commit 445f2fb06c2b8cac70ed2e4496d78f9b5523e3ee
parent 18e781e88b49526a72b1405d9b0baeeb13e4fc58
Author: Robin <kroekerrobin@gmail.com>
Date: Wed, 16 Aug 2023 11:28:12 +0200
Support --except argument and get attr value
Diffstat:
| M | htex.c | | | 45 | ++++++++++++++++++++++++++++----------------- |
| M | html.c | | | 82 | +++++++++++++++++++++++++++++++++++-------------------------------------------- |
| M | html.h | | | 3 | ++- |
| M | todo | | | 1 | - |
4 files changed, 66 insertions(+), 65 deletions(-)
diff --git a/htex.c b/htex.c
@@ -128,37 +128,48 @@ void freeOpts(struct find_opts *opt)
free(opt);
}
+enum output_type parseOutputArg(char *arg)
+{
+ if (arg == NULL)
+ return OUT_OUTER_HTML;
+ if (strcmp(arg, "innerhtml") == 0)
+ return OUT_INNER_HTML;
+ if (strcmp(arg, "innertext") == 0)
+ return OUT_INNER_TEXT;
+ if (strcmp(arg, "attr_value") == 0)
+ return OUT_ATTR_VALUE;
+ return -1;
+}
+
int main(int argc, char *argv[])
{
int o = 0;
int option_index = 0;
- bool isInnerHtml = false;
- bool isInnerText = false;
+ char *output = NULL;
bool isExcept = false;
char *text = NULL;
char *searchPattern = NULL;
static struct option long_options[] = {
- { "innerhtml", no_argument, 0, 'i' },
- { "innertext", no_argument, 0, 't' },
+ { "output", required_argument, 0, 'o' },
{ "except", no_argument, 0, 'e' },
{ 0, 0, 0, 0 }
};
- while ((o = getopt_long(argc, argv, "ite", long_options, &option_index)) != -1) {
+ while ((o = getopt_long(argc, argv, "o:e", long_options, &option_index)) != -1) {
switch(o) {
- case 'i':
- isInnerHtml = true;
- break;
- case 't':
- isInnerText = true;
+ case 'o':
+ output = realloc(output, (strlen(optarg)+1) * sizeof(char));
+ strcpy(output, optarg);
break;
case 'e':
isExcept = true;
break;
}
}
- if (isInnerHtml && isInnerText)
+ enum output_type out = parseOutputArg(output);
+ if (out == -1)
{
- fprintf(stderr, "Provide either --innerhtml or --innertext.\n");
+ fprintf(stderr, "Provide a valid output type!\n");
+ free(output);
return -1;
}
if (argc == optind)
@@ -190,17 +201,17 @@ int main(int argc, char *argv[])
fclose(fp);
if (strlen(text) == 0)
{
- printf("No data in file.\n");
+ fprintf(stderr, "No data in file.\n");
+ free(output);
+ free(text);
return 0;
}
}
struct find_opts *options = parseFilterOpts(searchPattern);
+ options->out = out;
options->isExcept = isExcept;
- if (isInnerHtml)
- options->out = OUT_INNER_HTML;
- if (isInnerText)
- options->out = OUT_INNER_TEXT;
filterHtml(text, options);
+ free(output);
freeOpts(options);
free(text);
return 0;
diff --git a/html.c b/html.c
@@ -676,48 +676,6 @@ void printHtml(struct tag *t, int indent)
}
}
-void printTag(char *text, struct tag *t, enum output_type out, struct tag_list *foundTags)
-{
- switch (out)
- {
- case OUT_INNER_HTML:
- break;
- case OUT_OUTER_HTML:
- break;
- case OUT_INNER_TEXT:
- break;
- }
- /* unsigned int p = *(unsigned int *)t;
- unsigned int cp;
- bool isMatch = false;
- for (int i=0; i<foundTags->len; i++)
- {
- cp = *(unsigned int *)foundTags->tags[i];
- if (p == cp)
- isMatch = true;
- }
- if (!isMatch)
- {
- char *trimmedText = NULL;
- switch (out)
- {
- case OUT_INNER_HTML:
- trimmedText = trim(getInnerHtml(text, t));
- break;
- case OUT_OUTER_HTML:
- trimmedText = trim(getOuterHtml(text, t));
- break;
- }
- if (strlen(trimmedText) > 0)
- printf("%s\n", trimmedText);
- free(trimmedText);
- } */
- for (int i=t->childrenLen-1; i>-1; i--)
- {
- printTag(text, t->children[i], out, foundTags);
- }
-}
-
void printResult
(
char *text,
@@ -728,7 +686,21 @@ void printResult
{
if (opts->isExcept)
{
- // printTag(text, rootTag, opts->out, foundTags);
+ bool isMatch = false;
+ for (int i=0; i<strlen(text); i++)
+ {
+ isMatch = false;
+ for (int k=0; k<foundTags->len; k++)
+ {
+ if (
+ foundTags->tags[k]->_outerHtmlBeginOffset <= i &&
+ foundTags->tags[k]->_outerHtmlEndOffset > i
+ )
+ isMatch = true;
+ }
+ if (!isMatch)
+ putchar(text[i]);
+ }
}
else
{
@@ -751,10 +723,28 @@ void printResult
case OUT_INNER_TEXT:
trimmedText = trim(foundTags->tags[i]->innerText);
break;
+ case OUT_ATTR_VALUE:
+ if (strlen(opts->key) > 0 && strlen(opts->tag) > 0)
+ {
+ for (int k=0; k<foundTags->tags[i]->attrsLen; k++)
+ {
+ if (strcmp(foundTags->tags[i]->attrs[k]->name, opts->key) == 0)
+ printf("%s\n", foundTags->tags[i]->attrs[k]->value);
+ }
+ }
+ else if (strlen(opts->tag) > 0)
+ {
+ for (int k=0; k<foundTags->tags[i]->attrsLen; k++)
+ printf("%s\n", foundTags->tags[i]->attrs[k]->value);
+ }
+ break;
+ }
+ if (trimmedText)
+ {
+ if (strlen(trimmedText) > 0)
+ printf("%s\n", trimmedText);
+ free(trimmedText);
}
- if (strlen(trimmedText) > 0)
- printf("%s\n", trimmedText);
- free(trimmedText);
}
}
}
diff --git a/html.h b/html.h
@@ -24,7 +24,8 @@ enum output_type
{
OUT_INNER_HTML,
OUT_OUTER_HTML,
- OUT_INNER_TEXT
+ OUT_INNER_TEXT,
+ OUT_ATTR_VALUE
};
struct find_opts
diff --git a/todo b/todo
@@ -1 +0,0 @@
-support --except argument