commit bcc4ec1b9dcc6d5133ad5be90a95c8de364c88df
parent 633e945a287ee1fc0b03d7bab858bbc8e7cdba09
Author: Robin <kroekerrobin@gmail.com>
Date: Sun, 6 Aug 2023 16:28:57 +0200
Add basic tag finding
syntax: tag[key=value]
Diffstat:
| M | htex.c | | | 163 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- |
| M | html.c | | | 62 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
| M | html.h | | | 14 | ++++++++++++++ |
3 files changed, 208 insertions(+), 31 deletions(-)
diff --git a/htex.c b/htex.c
@@ -42,38 +42,143 @@ char *readFile(FILE *fp)
return text;
}
+struct filter_opts *parseFilterOpts(const char *pattern)
+{
+ struct filter_opts *opt = malloc(sizeof(struct filter_opts));
+ opt->tag = malloc(sizeof(char));
+ opt->tag[0] = 0;
+ opt->attr = malloc(sizeof(char));
+ opt->attr[0] = 0;
+ opt->key = malloc(sizeof(char));
+ opt->key[0] = 0;
+ char *classValue = NULL;
+ bool isClassValue = false;
+ char *idValue = NULL;
+ bool isIdValue = false;
+ int i = 0;
+ bool isAttrKey = false;
+ bool isAttrOrTag = true;
+ char *attrOrTag = NULL;
+ int aot = 0;
+ int ak = 0;
+ int av = 0;
+ switch (pattern[0])
+ {
+ case '.':
+ isClassValue = true;
+ i = 1;
+ break;
+ case '#':
+ isIdValue = true;
+ i = 1;
+ break;
+ default:
+ }
+ for (; i<strlen(pattern); i++)
+ {
+ if (pattern[i] == ']')
+ break;
+ if (
+ !isAttrKey &&
+ !isAttrOrTag &&
+ pattern[i] != ']' &&
+ pattern[i] != '"'
+ )
+ {
+ opt->attr = realloc(opt->attr, (av+1) * sizeof(char));
+ opt->attr[av] = pattern[i];
+ av++;
+ }
+ if (pattern[i] == '=')
+ isAttrKey = false;
+ if (isAttrKey && !isAttrOrTag)
+ {
+ opt->key = realloc(opt->key, (ak+1) * sizeof(char));
+ opt->key[ak] = pattern[i];
+ ak++;
+ }
+ if (pattern[i] == '[')
+ {
+ isAttrKey = true;
+ isAttrOrTag = false;
+ }
+ if (isAttrOrTag)
+ {
+ attrOrTag = realloc(attrOrTag, (aot+1) * sizeof(char));
+ attrOrTag[aot] = pattern[i];
+ aot++;
+ }
+ }
+ attrOrTag = realloc(attrOrTag, (aot+1) * sizeof(char));
+ attrOrTag[aot] = 0;
+ if (isIdValue)
+ {
+ free(opt->key);
+ opt->key = NULL;
+ free(opt->attr);
+ opt->attr = NULL;
+ opt->attr = attrOrTag;
+ opt->key = realloc(opt->key, 3 * sizeof(char));
+ opt->key[0] = 'i';
+ opt->key[1] = 'd';
+ opt->key[2] = 0;
+ }
+ else if (isClassValue)
+ {
+ free(opt->key);
+ opt->key = NULL;
+ free(opt->attr);
+ opt->attr = NULL;
+ opt->attr = attrOrTag;
+ opt->key = realloc(opt->key, 6 * sizeof(char));
+ opt->key[0] = 'c';
+ opt->key[1] = 'l';
+ opt->key[2] = 'a';
+ opt->key[3] = 's';
+ opt->key[4] = 's';
+ opt->key[5] = 0;
+ }
+ else
+ {
+ free(opt->tag);
+ opt->tag = attrOrTag;
+ if (av > 0)
+ {
+ opt->attr = realloc(opt->attr, (av+1) * sizeof(char));
+ opt->attr[av] = 0;
+ }
+ if (ak > 0)
+ {
+ opt->key = realloc(opt->key, (ak+1) * sizeof(char));
+ opt->key[ak] = 0;
+ }
+ }
+ return opt;
+}
+
+void freeOpts(struct filter_opts *opt)
+{
+ free(opt->tag);
+ free(opt->attr);
+ free(opt->key);
+ free(opt);
+}
+
int main(int argc, char *argv[])
{
int o = 0;
int option_index = 0;
- char *tag = NULL;
- char *attribute = NULL;
- char *key = NULL;
bool isInnerHtml = false;
bool isExcept = false;
char *text = NULL;
+ char *searchPattern = NULL;
static struct option long_options[] = {
- { "tag", required_argument, 0, 't' },
- { "attribute", required_argument, 0, 'a' },
- { "key", required_argument, 0, 'k' },
{ "innerhtml", no_argument, 0, 'i' },
{ "except", no_argument, 0, 'e' },
{ 0, 0, 0, 0 }
};
- while ((o = getopt_long(argc, argv, "t:a:k:ie", long_options, &option_index)) != -1) {
+ while ((o = getopt_long(argc, argv, "ie", long_options, &option_index)) != -1) {
switch(o) {
- case 't':
- tag = malloc((strlen(optarg)+1) * sizeof(char));
- strcpy(tag, optarg);
- break;
- case 'a':
- attribute = malloc((strlen(optarg)+1) * sizeof(char));
- strcpy(attribute, optarg);
- break;
- case 'k':
- key = malloc((strlen(optarg)+1) * sizeof(char));
- strcpy(key, optarg);
- break;
case 'i':
isInnerHtml = true;
break;
@@ -82,17 +187,24 @@ int main(int argc, char *argv[])
break;
}
}
- if (argc > optind+1)
+ if (argc == optind)
+ {
+ fprintf(stderr, "Provide a search pattern!\n");
+ return -1;
+ }
+ if (argc > optind+2)
{
fprintf(stderr, "Provide only one file!\n");
return -1;
}
- if (argc == optind)
+ if (argc == optind+1)
{
+ searchPattern = argv[argc-1];
text = readFile(stdin);
}
- else
+ else if (argc == optind+2)
{
+ searchPattern = argv[argc-2];
char *filepath = argv[argc-1];
FILE *fp = fopen(filepath, "r");
if (fp == NULL)
@@ -108,10 +220,9 @@ int main(int argc, char *argv[])
return 0;
}
}
- parseHtml(text);
- free(tag);
- free(attribute);
- free(key);
+ struct filter_opts *options = parseFilterOpts(searchPattern);
+ filterHtml(text, options);
+ freeOpts(options);
free(text);
return 0;
}
diff --git a/html.c b/html.c
@@ -438,7 +438,58 @@ void printHtml(struct tag *t)
}
}
-void parseHtml(const char *text)
+struct tag *findTag(struct tag *tag, struct tag_list *list, struct filter_opts *opt)
+{
+ bool matchesTag = false;
+ bool matchesAttrKey = false;
+ bool matchesAttrKeyAndValue = false;
+ if (strcmp(tag->name, opt->tag) == 0)
+ matchesTag = true;
+ for (int i=0; i<tag->attrsLen; i++)
+ {
+ if (strcmp(tag->attrs[i]->name, opt->key) == 0)
+ matchesAttrKey = true;
+ if (matchesAttrKey)
+ {
+ if (strcmp(tag->attrs[i]->value, opt->attr) == 0)
+ matchesAttrKeyAndValue = true;
+ }
+ }
+ if (strlen(opt->tag) > 0 && strlen(opt->key) > 0 && strlen(opt->attr) > 0)
+ {
+ if (matchesTag && matchesAttrKeyAndValue)
+ return tag;
+ }
+ else if (strlen(opt->tag) > 0 && strlen(opt->key) > 0)
+ {
+ if (matchesTag && matchesAttrKey)
+ return tag;
+ }
+ else if (strlen(opt->tag) > 0)
+ {
+ if (matchesTag)
+ return tag;
+ }
+ else if (strlen(opt->key) > 0 && strlen(opt->attr) > 0)
+ {
+ if (matchesAttrKeyAndValue)
+ return tag;
+ }
+ else if (strlen(opt->key) > 0)
+ {
+ if (matchesAttrKey)
+ return tag;
+ }
+ for (int i=0; i<tag->childrenLen; i++)
+ {
+ struct tag *foundTag = findTag(tag->children[i], list, opt);
+ if (foundTag != NULL)
+ return foundTag;
+ }
+ return NULL;
+}
+
+void filterHtml(const char *text, struct filter_opts *opts)
{
struct tag *rootTag;
struct tag_list *tagList = malloc(sizeof(struct tag_list));
@@ -449,10 +500,11 @@ void parseHtml(const char *text)
rootTag = parseTag(text+len, STATE_INNER_TEXT, tagList);
else
rootTag = parseTag(text, STATE_INNER_TEXT, tagList);
- printHtml(rootTag);
- /* printf("%s\n", rootTag->children[0]->children[0]->name);
- printf("%s\n", rootTag->children[0]->children[0]->children[1]->name);
- printf("%s\n", rootTag->children[0]->children[0]->children[1]->attrs[0]->value); */
+ struct tag *result = findTag(rootTag, tagList, opts);
+ if (result == NULL)
+ printError("No tag found.");
+ else
+ printf("result: %s\n", result->name);
freeTag(rootTag);
freeTagList(tagList);
}
diff --git a/html.h b/html.h
@@ -19,6 +19,13 @@ const char *voidElements[] = {
"input", "link", "meta", "source", "track", "wbr"
};
+struct filter_opts
+{
+ char *tag;
+ char *attr;
+ char *key;
+};
+
struct attr
{
char *name;
@@ -61,3 +68,10 @@ enum attr_value_syntax
AVS_APOSTROPHE,
AVS_UNQUOTED
};
+
+/* enum search_type
+{
+ ST_NO,
+ ST_LIST,
+ ST_HIERARCHY
+}; */