commit 05d47383d977a9cb3bd69ac2c9636cc0ca9e06f6
parent 52dd773f9344d02753b2b9b4dce1fa8d44f415f9
Author: Robin <kroekerrobin@gmail.com>
Date: Thu, 15 Sep 2022 22:49:26 +0200
Add -e/--except option
Diffstat:
| M | htex.1 | | | 14 | ++++++++++---- |
| M | htex.c | | | 109 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- |
| M | todo | | | 4 | +--- |
3 files changed, 105 insertions(+), 22 deletions(-)
diff --git a/htex.1 b/htex.1
@@ -3,7 +3,7 @@
htex \- \fI\,ex\/\fRtract \fI\,ht\/\fRml
.SH SYNOPSIS
.B htex
--a \fI\,attribute_name\/\fR [-i] -|\fI\,filename\/\fR
+-a \fI\,attribute_name\/\fR [-e] [-i] -|\fI\,filename\/\fR
.SH DESCRIPTION
.PP
Receives text from stdin or a file
@@ -28,7 +28,11 @@ will be taken as a tag name.
.TP
\fB\,-i\/\fR, \fB\,--innerhtml\/\fR
Instead of returning the html tag only return
-the content (innerHTML) of the tag
+the content (innerHTML) of the tag. Cannot be used together with the -e option.
+.TP
+\fB\,-e\/\fR, \fB\,--except\/\fR
+Output everything except the html tag specified in -a.
+Cannot be used together with the -i option.
.SH EXAMPLES
.sp
.RS 4
@@ -36,4 +40,6 @@ cat test.html | htex -i -a ".o-headline" -
htex -a span test.html
-htex --innerhtml --attribute "#container" test.html
-\ No newline at end of file
+htex --innerhtml --attribute "#container" test.html
+
+htex -e -a ".unnecessary-class" test.html
+\ No newline at end of file
diff --git a/htex.c b/htex.c
@@ -10,6 +10,12 @@ char *text;
char attribute_name[200];
char tag_name[50];
bool inner_html = false;
+bool except = false;
+struct match {
+ int start;
+ int end;
+};
+struct match *matches;
int find_start_of_opening_tag_pos(int class_position) {
int i = 1;
@@ -133,7 +139,7 @@ bool correct_name_begin_or_end(char prev_char) {
void find_html_tag_by_class(char *class_name) {
int o = 0;
int failure = 0;
- // int counter = 0;
+ int counter = 0;
int is_not_quotation_mark = 1;
for (int k=0; k<strlen(text); k++) {
@@ -178,11 +184,19 @@ void find_html_tag_by_class(char *class_name) {
} else {
int open_tag_pos = find_start_of_opening_tag_pos(k);
find_tag_name(open_tag_pos);
- int close_tag_pos = find_closing_tag_pos(open_tag_pos, false);
- for (int e=open_tag_pos; e<close_tag_pos; e++) {
- printf("%c", text[e]);
+ int end_of_open_tag_pos = find_end_of_opening_tag_pos(k);
+ int close_tag_pos = find_closing_tag_pos(end_of_open_tag_pos, false);
+ if (except) {
+ matches = realloc(matches, (counter+1) * sizeof(struct match));
+ matches[counter].start = open_tag_pos;
+ matches[counter].end = close_tag_pos;
+ counter++;
+ } else {
+ for (int e=open_tag_pos; e<close_tag_pos; e++) {
+ printf("%c", text[e]);
+ }
+ printf("\n");
}
- printf("\n");
}
}
failure = 0;
@@ -193,13 +207,26 @@ void find_html_tag_by_class(char *class_name) {
o = 0;
}
}
- // printf("counter: %d\n", counter);
+ if (except) {
+ int start = 0;
+ for (int i=0; i<counter; i++) {
+ for (int e=start; e<matches[i].start; e++) {
+ printf("%c", text[e]);
+ }
+ start = matches[i].end;
+ }
+ for (int i=start; i<strlen(text); i++) {
+ printf("%c", text[i]);
+ }
+ printf("\n");
+ free(matches);
+ }
}
void find_html_tag_by_id(char *id_name) {
int o = 0;
int failure = 0;
- // int counter = 0;
+ int counter = 0;
int is_not_quotation_mark = 1;
for (int k=0; k<strlen(text); k++) {
@@ -243,10 +270,17 @@ void find_html_tag_by_id(char *id_name) {
find_tag_name(start_of_open_tag_pos);
int end_of_open_tag_pos = find_end_of_opening_tag_pos(k);
int close_tag_pos = find_closing_tag_pos(end_of_open_tag_pos, false);
- for (int e=start_of_open_tag_pos; e<close_tag_pos; e++) {
- printf("%c", text[e]);
+ if (except) {
+ matches = realloc(matches, (counter+1) * sizeof(struct match));
+ matches[counter].start = start_of_open_tag_pos;
+ matches[counter].end = close_tag_pos;
+ counter++;
+ } else {
+ for (int e=start_of_open_tag_pos; e<close_tag_pos; e++) {
+ printf("%c", text[e]);
+ }
+ printf("\n");
}
- printf("\n");
}
}
failure = 0;
@@ -257,10 +291,24 @@ void find_html_tag_by_id(char *id_name) {
o = 0;
}
}
- // printf("counter: %d\n", counter);
+ if (except) {
+ int start = 0;
+ for (int i=0; i<counter; i++) {
+ for (int e=start; e<matches[i].start; e++) {
+ printf("%c", text[e]);
+ }
+ start = matches[i].end;
+ }
+ for (int i=start; i<strlen(text); i++) {
+ printf("%c", text[i]);
+ }
+ printf("\n");
+ free(matches);
+ }
}
void find_html_tag_by_tag() {
int failure = 0;
+ int counter = 0;
for (int k=0; k<strlen(text); k++) {
if (text[k] == '<' && text[k+1] != '/') {
for (int o=0; o<strlen(attribute_name); o++) {
@@ -286,16 +334,37 @@ void find_html_tag_by_tag() {
printf("\n");
} else {
int close_tag_pos = find_closing_tag_pos(after_tag_pos, false);
- for (int e=open_tag_pos; e<close_tag_pos; e++) {
- printf("%c", text[e]);
+ if (except) {
+ matches = realloc(matches, (counter+1) * sizeof(struct match));
+ matches[counter].start = open_tag_pos;
+ matches[counter].end = close_tag_pos;
+ counter++;
+ } else {
+ for (int e=open_tag_pos; e<close_tag_pos; e++) {
+ printf("%c", text[e]);
+ }
+ printf("\n");
}
- printf("\n");
}
}
}
failure = 0;
}
}
+ if (except) {
+ int start = 0;
+ for (int i=0; i<counter; i++) {
+ for (int e=start; e<matches[i].start; e++) {
+ printf("%c", text[e]);
+ }
+ start = matches[i].end;
+ }
+ for (int i=start; i<strlen(text); i++) {
+ printf("%c", text[i]);
+ }
+ printf("\n");
+ free(matches);
+ }
}
void find_html_tag() {
@@ -331,10 +400,11 @@ int main(int argc, char *argv[]) {
static struct option long_options[] = {
{ "attribute", required_argument, 0, 'a' },
{ "innerhtml", no_argument, 0, 'i' },
+ { "except", no_argument, 0, 'e' },
{ 0, 0, 0, 0 }
};
int option_index = 0;
- while ((o = getopt_long(argc, argv, "ia:", long_options, &option_index)) != -1) {
+ while ((o = getopt_long(argc, argv, "eia:", long_options, &option_index)) != -1) {
switch(o) {
case 'a':
for (int j=0; j<strlen(optarg); j++) {
@@ -344,8 +414,15 @@ int main(int argc, char *argv[]) {
case 'i':
inner_html = true;
break;
+ case 'e':
+ except = true;
+ break;
}
}
+ if (inner_html && except) {
+ printf("You can't use the options -i (--innerhtml) and -e (--except) at the same time.\n");
+ return -1;
+ }
if (argc == (optind + 1)) {
if (*argv[argc-1] == '-') {
while (read(0, &buffer, 1) > 0) {
@@ -357,6 +434,7 @@ int main(int argc, char *argv[]) {
return -1;
}
}
+ text[i] = '\0';
find_html_tag();
free(text);
} else {
@@ -371,6 +449,7 @@ int main(int argc, char *argv[]) {
return -1;
}
}
+ text[i] = '\0';
find_html_tag();
free(text);
} else {
diff --git a/todo b/todo
@@ -1,4 +1,3 @@
+refactor; heavy
implement find_attribute_value_by_*
implement filtering not only by class or id, also like this .test[data="asdf"]
-improve structure of code
-make it better man
-\ No newline at end of file