htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit 27d0d9371aa44c9f00dbc8374ba10cc12270895d
parent 97aacdb5f6227aafc7f2ddb0553de77205c785c0
Author: Robin <kroekerrobin@gmail.com>
Date:   Tue, 30 Aug 2022 18:03:26 +0200

Implement find_html_tag_by_tag

Diffstat:
Mhtex.1 | 8++++----
Mhtex.c | 46++++++++++++++++++++++++++++++++++++++++++++--
Mtodo | 8++++++--
3 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/htex.1 b/htex.1 @@ -32,8 +32,8 @@ the content (innerHTML) of the tag .SH EXAMPLES .sp .RS 4 -cat test.html | htex -i -c "o-headline" - +cat test.html | htex -i -a ".o-headline" - -htex -c "o-headline" test.html +htex -a span test.html -htex --class "o-headline" test.html -\ No newline at end of file +htex --innerhtml --attribute "#container" test.html +\ No newline at end of file diff --git a/htex.c b/htex.c @@ -37,6 +37,10 @@ int find_end_of_opening_tag_pos(int class_position) { } } +/* + This function works only if the html tag + has attributes. +*/ void find_tag_name(int open_tag_pos) { int i = 1; int end_of_tag_name = 0; @@ -257,7 +261,42 @@ void find_html_tag_by_id(char *id_name) { // printf("counter: %d\n", counter); } void find_html_tag_by_tag() { - printf("Not yet implemented.\n"); + int failure = 0; + for (int k=0; k<strlen(text); k++) { + if (text[k] == '<' && text[k+1] != '/') { + for (int o=0; o<strlen(attribute_name); o++) { + if (attribute_name[o] != text[k+1+o]) { + failure = 1; + break; + } + } + if (failure == 0) { + if ( + text[k+1+strlen(attribute_name)] == '>' || + text[k+1+strlen(attribute_name)] == ' ' || + text[k+1+strlen(attribute_name)] == '\n' + ) { + int open_tag_pos = k; + int after_tag_pos = k+1+strlen(attribute_name)+1; + if (inner_html) { + int close_tag_pos = find_closing_tag_pos(after_tag_pos, true); + int end_of_open_tag_pos = find_end_of_opening_tag_pos(k+strlen(attribute_name)); + for (int e=end_of_open_tag_pos; e<close_tag_pos; e++) { + printf("%c", text[e]); + } + printf("\n"); + } else { + int close_tag_pos = find_closing_tag_pos(after_tag_pos, false); + for (int e=open_tag_pos; e<close_tag_pos; e++) { + printf("%c", text[e]); + } + printf("\n"); + } + } + } + failure = 0; + } + } } void find_html_tag() { @@ -273,7 +312,10 @@ void find_html_tag() { find_html_tag_by_id(identifier); break; default: - find_html_tag_by_tag(); // it uses attribute_name directly + for (int i=0; i<strlen(attribute_name); i++) { + tag_name[i] = attribute_name[i]; + } + find_html_tag_by_tag(); } } diff --git a/todo b/todo @@ -1 +1,5 @@ -doesn't find tag if class="" is in a new line different from the tag. -\ No newline at end of file +don't allocate one million bytes of memory rather make it dynamic +implement find_attribute_value_by_* +implement filtering not only by class or id, also like this .test[data="asdf"] +improve structure of code +make it better man +\ No newline at end of file