commit 4873756e4ae3c76f0f90da7f3feb61442dd3f4ef
parent 4007840a37366244a2a5b5041d0ebb2853863eaf
Author: Robin <kroekerrobin@gmail.com>
Date: Sat, 27 Aug 2022 22:43:53 +0200
Start implementing finding by attribute name
Instead of only being able to find by a class
name, find instead by id name or class name
Diffstat:
| A | .gitignore | | | 3 | +++ |
| M | htex.1 | | | 18 | ++++++++++++------ |
| M | htex.c | | | 107 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- |
| A | todo | | | 2 | ++ |
4 files changed, 112 insertions(+), 18 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+test.html
+htex
+\ No newline at end of file
diff --git a/htex.1 b/htex.1
@@ -3,22 +3,28 @@
htex \- \fI\,ex\/\fRtract \fI\,ht\/\fRml
.SH SYNOPSIS
.B htex
--c \fI\,class_name\/\fR [-i] -|\fI\,filename\/\fR
+-a \fI\,attribute_name\/\fR [-i] -|\fI\,filename\/\fR
.SH DESCRIPTION
.PP
Receives text from stdin or a file
and interprets it as html. You provide
-a \fI\,class_name\/\fR via the
-.B -c
+an \fI\,attribute_name\/\fR via the
+.B -a
option and htex will write the html tag
-found by \fI\,class_name\/\fR to stdout.
+found by \fI\,attribute_name\/\fR to stdout.
Pass the
.B -i
option to only output the content (innerHTML) of the
html tag.
.TP
-\fB\,-c\/\fR, \fB\,--class\/\fR \fI\,class_name\/\fR
-Filter html by the class name of a html tag
+\fB\,-a\/\fR, \fB\,--attribute\/\fR \fI\,attribute_name\/\fR
+Filter html by the attribute name of a html tag.
+If \fI\,attribute_name\/\fR starts with a dot (.) then
+the following characters will be taken as the class name
+of a tag. If \fI\,attribute_name\/\fR starts with a hashtag (#)
+the following characters will be taken as the id name of a tag.
+If \fI\,attribute_name\/\fR is neither a dot nor a hashtag it
+will be taken as a tag name.
.TP
\fB\,-i\/\fR, \fB\,--innerhtml\/\fR
Instead of returning the html tag only return
diff --git a/htex.c b/htex.c
@@ -7,8 +7,9 @@
#define ONE_MILLION 1000000
char text[ONE_MILLION];
-char class_name[200];
+char attribute_name[200];
char tag_name[50];
+bool inner_html = false;
int find_start_of_opening_tag_pos(int class_position) {
int i = 1;
@@ -114,7 +115,7 @@ int find_closing_tag_pos(int open_tag_pos, bool inner_html) {
return -1;
}
-bool correct_class_name_begin_or_end(char prev_char) {
+bool correct_name_begin_or_end(char prev_char) {
switch(prev_char) {
case '"':
return true;
@@ -127,7 +128,7 @@ bool correct_class_name_begin_or_end(char prev_char) {
}
}
-void find_html_tag_by_class_name(bool inner_html) {
+void find_html_tag_by_class(char *class_name) {
int o = 0;
int failure = 0;
// int counter = 0;
@@ -155,8 +156,8 @@ void find_html_tag_by_class_name(bool inner_html) {
}
if (failure == 0) {
if (
- !correct_class_name_begin_or_end(text[k+6+o]) ||
- !correct_class_name_begin_or_end(text[k+7+o+strlen(class_name)])
+ !correct_name_begin_or_end(text[k+6+o]) ||
+ !correct_name_begin_or_end(text[k+7+o+strlen(class_name)])
) {
failure = 1;
}
@@ -193,23 +194,105 @@ void find_html_tag_by_class_name(bool inner_html) {
// printf("counter: %d\n", counter);
}
+void find_html_tag_by_id(char *id_name) {
+ int o = 0;
+ int failure = 0;
+ // int counter = 0;
+ int is_not_quotation_mark = 1;
+
+ for (int k=0; k<strlen(text); k++) {
+ if (
+ text[k] == 'i' &&
+ text[k+1] == 'd'
+ ) {
+ while (is_not_quotation_mark == 1) {
+ if (text[k+4+o] == '"' || text[k+4+o] == '\'') {
+ is_not_quotation_mark = 0;
+ break;
+ }
+ if (id_name[0] == text[k+4+o]) {
+ for (int l=1; l<strlen(id_name); l++) {
+ if (id_name[l] != text[k+4+o+l]) {
+ failure = 1;
+ break;
+ }
+ }
+ if (failure == 0) {
+ if (
+ !correct_name_begin_or_end(text[k+3+o]) ||
+ !correct_name_begin_or_end(text[k+4+o+strlen(id_name)])
+ ) {
+ failure = 1;
+ }
+ }
+ if (failure == 0) {
+ // counter++;
+ if (inner_html) {
+ int start_of_open_tag_pos = find_start_of_opening_tag_pos(k);
+ find_tag_name(start_of_open_tag_pos);
+ int end_of_open_tag_pos = find_end_of_opening_tag_pos(k);
+ int close_tag_pos = find_closing_tag_pos(end_of_open_tag_pos, true);
+ for (int e=end_of_open_tag_pos; e<close_tag_pos; e++) {
+ printf("%c", text[e]);
+ }
+ printf("\n");
+ } else {
+ int open_tag_pos = find_start_of_opening_tag_pos(k);
+ find_tag_name(open_tag_pos);
+ int close_tag_pos = find_closing_tag_pos(open_tag_pos, false);
+ for (int e=open_tag_pos; e<close_tag_pos; e++) {
+ printf("%c", text[e]);
+ }
+ printf("\n");
+ }
+ }
+ failure = 0;
+ }
+ o++;
+ }
+ is_not_quotation_mark = 1;
+ o = 0;
+ }
+ }
+ // printf("counter: %d\n", counter);
+}
+void find_html_tag_by_tag() {
+ printf("Not yet implemented.\n");
+}
+
+void find_html_tag() {
+ char identifier[200];
+ for (int i=0; i<strlen(attribute_name); i++) {
+ identifier[i] = attribute_name[i+1];
+ }
+ switch(attribute_name[0]) {
+ case '.':
+ find_html_tag_by_class(identifier);
+ break;
+ case '#':
+ find_html_tag_by_id(identifier);
+ break;
+ default:
+ find_html_tag_by_tag(); // it uses attribute_name directly
+ }
+}
+
int main(int argc, char *argv[]) {
int i = 0;
- bool inner_html = false;
char buffer;
int o;
static struct option long_options[] = {
- { "class", required_argument, 0, 'c' },
+ { "attribute", required_argument, 0, 'a' },
{ "innerhtml", no_argument, 0, 'i' },
{ 0, 0, 0, 0 }
};
int option_index = 0;
- while ((o = getopt_long(argc, argv, "ic:", long_options, &option_index)) != -1) {
+ while ((o = getopt_long(argc, argv, "ia:", long_options, &option_index)) != -1) {
switch(o) {
- case 'c':
+ case 'a':
for (int j=0; j<strlen(optarg); j++) {
- class_name[j] = optarg[j];
+ attribute_name[j] = optarg[j];
}
break;
case 'i':
@@ -223,7 +306,7 @@ int main(int argc, char *argv[]) {
text[i] = buffer;
i++;
}
- find_html_tag_by_class_name(inner_html);
+ find_html_tag();
} else {
int fd = open(argv[argc-1], O_RDONLY);
if (fd != -1) {
@@ -231,7 +314,7 @@ int main(int argc, char *argv[]) {
text[i] = buffer;
i++;
}
- find_html_tag_by_class_name(inner_html);
+ find_html_tag();
} else {
printf("Couldn't read file \"%s\"\n", argv[argc-1]);
}
diff --git a/todo b/todo
@@ -0,0 +1 @@
+doesn't find tag if class="" is in a new line different from the tag.
+\ No newline at end of file