commit 0544bcf9a77afced6fe52c018fb1c4758ffde18c
parent f527a413ca2f869e114bee4d0b3db7f55dbfaf21
Author: Robin <kroekerrobin@gmail.com>
Date: Sun, 13 Aug 2023 16:23:41 +0200
Awake innerText to life
Diffstat:
| M | htex.c | | | 14 | +++++++++++++- |
| M | html.c | | | 56 | ++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
| M | html.h | | | 4 | +++- |
| M | lib.c | | | 3 | +-- |
4 files changed, 69 insertions(+), 8 deletions(-)
diff --git a/htex.c b/htex.c
@@ -133,24 +133,34 @@ int main(int argc, char *argv[])
int o = 0;
int option_index = 0;
bool isInnerHtml = false;
+ bool isInnerText = false;
bool isExcept = false;
char *text = NULL;
char *searchPattern = NULL;
static struct option long_options[] = {
{ "innerhtml", no_argument, 0, 'i' },
+ { "innertext", no_argument, 0, 't' },
{ "except", no_argument, 0, 'e' },
{ 0, 0, 0, 0 }
};
- while ((o = getopt_long(argc, argv, "ie", long_options, &option_index)) != -1) {
+ while ((o = getopt_long(argc, argv, "ite", long_options, &option_index)) != -1) {
switch(o) {
case 'i':
isInnerHtml = true;
break;
+ case 't':
+ isInnerText = true;
+ break;
case 'e':
isExcept = true;
break;
}
}
+ if (isInnerHtml && isInnerText)
+ {
+ fprintf(stderr, "Provide either --innerhtml or --innertext.\n");
+ return -1;
+ }
if (argc == optind)
{
fprintf(stderr, "Provide a search pattern!\n");
@@ -188,6 +198,8 @@ int main(int argc, char *argv[])
options->isExcept = isExcept;
if (isInnerHtml)
options->out = OUT_INNER_HTML;
+ if (isInnerText)
+ options->out = OUT_INNER_TEXT;
filterHtml(text, options);
freeOpts(options);
free(text);
diff --git a/html.c b/html.c
@@ -36,6 +36,8 @@ struct tag *initTag()
struct tag *t = malloc(sizeof(struct tag));
t->name = malloc(sizeof(char));
t->name[0] = 0;
+ t->innerText = malloc(sizeof(char));
+ t->innerText[0] = 0;
t->attrs = NULL;
t->children = NULL;
t->attrsLen = 0;
@@ -325,6 +327,8 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
state = STATE_TAG;
break;
}
+ stillOpenTag = getLastOpenTag(tagList);
+ stillOpenTag->innerText = stringCat(stillOpenTag->innerText, cpToChars(cp, ret));
break;
case STATE_TAG:
if (cp == SOLIDUS)
@@ -545,6 +549,7 @@ struct tag *parseTag(char *text, size_t offset, enum state state, struct tag_lis
void freeTag(struct tag *t)
{
free(t->name);
+ free(t->innerText);
for (int i=0; i<t->attrsLen; i++)
{
free(t->attrs[i]->name);
@@ -655,7 +660,32 @@ void printHtml(struct tag *t, int indent)
void printTag(char *text, struct tag *t, enum output_type out, struct tag_list *foundTags)
{
- unsigned int p = *(unsigned int *)t;
+ // bool doPrint = true;
+ switch (out)
+ {
+ case OUT_INNER_HTML:
+ /* for (int i=t->_innerHtmlBeginOffset; i<t->_innerHtmlEndOffset; i++)
+ {
+ doPrint = true;
+ for (int k=0; k<foundTags->len; k++)
+ {
+ if (
+ foundTags->tags[k]->_innerHtmlBeginOffset < i &&
+ foundTags->tags[k]->_innerHtmlEndOffset > i
+ )
+ doPrint = false;
+ }
+ if (doPrint)
+ putchar(text[i]);
+ } */
+ break;
+ case OUT_OUTER_HTML:
+ break;
+ case OUT_INNER_TEXT:
+ break;
+ }
+ putchar('\n');
+ /* unsigned int p = *(unsigned int *)t;
unsigned int cp;
bool isMatch = false;
for (int i=0; i<foundTags->len; i++)
@@ -679,7 +709,7 @@ void printTag(char *text, struct tag *t, enum output_type out, struct tag_list *
if (strlen(trimmedText) > 0)
printf("%s\n", trimmedText);
free(trimmedText);
- }
+ } */
for (int i=t->childrenLen-1; i>-1; i--)
{
printTag(text, t->children[i], out, foundTags);
@@ -695,19 +725,37 @@ void printResult
)
{
if (opts->isExcept)
+ {
+ /* printf("len: %ld\n", foundTags->len);
+ char *trimmedText = NULL;
+ for (int i=0; i<foundTags->len; i++)
+ {
+ trimmedText = trim(getOuterHtml(text, foundTags->tags[i]));
+ printf("%s\n", trimmedText);
+ free(trimmedText);
+ } */
printTag(text, rootTag, opts->out, foundTags);
+ }
else
{
+ char *requestedText = NULL;
char *trimmedText = NULL;
for (int i=0; i<foundTags->len; i++)
{
switch (opts->out)
{
case OUT_INNER_HTML:
- trimmedText = trim(getInnerHtml(text, foundTags->tags[i]));
+ requestedText = getInnerHtml(text, foundTags->tags[i]);
+ trimmedText = trim(requestedText);
+ free(requestedText);
break;
case OUT_OUTER_HTML:
- trimmedText = trim(getOuterHtml(text, foundTags->tags[i]));
+ requestedText = getOuterHtml(text, foundTags->tags[i]);
+ trimmedText = trim(requestedText);
+ free(requestedText);
+ break;
+ case OUT_INNER_TEXT:
+ trimmedText = trim(foundTags->tags[i]->innerText);
break;
}
if (strlen(trimmedText) > 0)
diff --git a/html.h b/html.h
@@ -23,7 +23,8 @@ const char *voidElements[] = {
enum output_type
{
OUT_INNER_HTML,
- OUT_OUTER_HTML
+ OUT_OUTER_HTML,
+ OUT_INNER_TEXT
};
struct filter_opts
@@ -46,6 +47,7 @@ struct tag
char *name;
struct attr **attrs;
struct tag **children;
+ char *innerText;
size_t attrsLen;
size_t childrenLen;
bool _isVoidElement; // means there is no closing tag
diff --git a/lib.c b/lib.c
@@ -1,4 +1,4 @@
-char *stringCat(char *str1,char *str2)
+char *stringCat(char *str1, char *str2)
{
int str1Len = 0;
int str2Len = 0;
@@ -74,7 +74,6 @@ char *trim(char *text)
}
trimmedText = realloc(trimmedText, (k+1) * sizeof(char));
trimmedText[k] = 0;
- free(text);
return trimmedText;
}