Implement find_html_tag_by_tag - htex - simple incorrect html parser

commit 27d0d9371aa44c9f00dbc8374ba10cc12270895d
parent 97aacdb5f6227aafc7f2ddb0553de77205c785c0
Author: Robin <kroekerrobin@gmail.com>
Date:   Tue, 30 Aug 2022 18:03:26 +0200

Implement find_html_tag_by_tag

Diffstat:
M htex.1  | 8 ++++----
M htex.c  | 46 ++++++++++++++++++++++++++++++++++++++++++++--
M todo  | 8 ++++++--

3 files changed, 54 insertions(+), 8 deletions(-)
diff --git a/htex.1 b/htex.1
@@ -32,8 +32,8 @@ the content (innerHTML) of the tag
 .SH EXAMPLES
 .sp
 .RS 4
-cat test.html | htex -i -c "o-headline" -
+cat test.html | htex -i -a ".o-headline" -
 
-htex -c "o-headline" test.html
+htex -a span test.html
 
-htex --class "o-headline" test.html
-\ No newline at end of file
+htex --innerhtml --attribute "#container" test.html
+\ No newline at end of file
diff --git a/htex.c b/htex.c
@@ -37,6 +37,10 @@ int find_end_of_opening_tag_pos(int class_position) {
     }
 }
 
+/*
+    This function works only if the html tag
+    has attributes.
+*/
 void find_tag_name(int open_tag_pos) {
     int i = 1;
     int end_of_tag_name = 0;
@@ -257,7 +261,42 @@ void find_html_tag_by_id(char *id_name) {
     // printf("counter: %d\n", counter);
 }
 void find_html_tag_by_tag() {
-    printf("Not yet implemented.\n");
+    int failure = 0;
+    for (int k=0; k<strlen(text); k++) {
+        if (text[k] == '<' && text[k+1] != '/') {
+            for (int o=0; o<strlen(attribute_name); o++) {
+                if (attribute_name[o] != text[k+1+o]) {
+                    failure = 1;
+                    break;
+                }
+            }
+            if (failure == 0) {
+                if (
+                    text[k+1+strlen(attribute_name)] == '>' ||
+                    text[k+1+strlen(attribute_name)] == ' ' ||
+                    text[k+1+strlen(attribute_name)] == '\n'
+                ) {
+                    int open_tag_pos = k;
+                    int after_tag_pos = k+1+strlen(attribute_name)+1;
+                    if (inner_html) {
+                        int close_tag_pos = find_closing_tag_pos(after_tag_pos, true);
+                        int end_of_open_tag_pos = find_end_of_opening_tag_pos(k+strlen(attribute_name));
+                        for (int e=end_of_open_tag_pos; e<close_tag_pos; e++) {
+                            printf("%c", text[e]);
+                        }
+                        printf("\n");
+                    } else {
+                        int close_tag_pos = find_closing_tag_pos(after_tag_pos, false);
+                        for (int e=open_tag_pos; e<close_tag_pos; e++) {
+                            printf("%c", text[e]);
+                        }
+                        printf("\n");
+                    }
+                }
+            }
+            failure = 0;
+        }
+    }
 }
 
 void find_html_tag() {
@@ -273,7 +312,10 @@ void find_html_tag() {
             find_html_tag_by_id(identifier);
             break;
         default:
-            find_html_tag_by_tag(); // it uses attribute_name directly
+            for (int i=0; i<strlen(attribute_name); i++) {
+                tag_name[i] = attribute_name[i];
+            }
+            find_html_tag_by_tag();
     }
 }
 
diff --git a/todo b/todo
@@ -1 +1,5 @@
-doesn't find tag if class="" is in a new line different from the tag.
-\ No newline at end of file
+don't allocate one million bytes of memory rather make it dynamic
+implement find_attribute_value_by_*
+implement filtering not only by class or id, also like this .test[data="asdf"]
+improve structure of code
+make it better man 
+\ No newline at end of file

	htex simple incorrect html parser
	git clone git://git.relim.de/htex.git
	Log \| Files \| Refs \| README

M	htex.1	\|	8	++++----
M	htex.c	\|	46	++++++++++++++++++++++++++++++++++++++++++++--
M	todo	\|	8	++++++--