htex

simple incorrect html parser
git clone git://git.relim.de/htex.git
Log | Files | Refs | README

commit 1672b81239308800ba3a76a88698d8c2c99cfb9e
parent aff92fa043b11f5e9e4d4157dae664da03262400
Author: Robin <kroekerrobin@gmail.com>
Date:   Tue,  2 Apr 2024 19:43:02 +0200

Edit small things

Diffstat:
Mhtex.1 | 14+++++++++-----
Mhtex.c | 2++
2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/htex.1 b/htex.1 @@ -6,9 +6,10 @@ htex \- \fI\,ex\/\fRtract \fI\,ht\/\fRml [-o \fI\,OUTPUT_TYPE\/\fR] [-e] [-l \fI\,NUM\/\fR] \fI\,PATTERN\/\fR [\fI\,FILE\/\fR] .SH DESCRIPTION .PP -Receives text from stdin or a file -and interprets it as html. htex will -filter the html based on the +Receives text from stdin or +.I FILE +and parses it as html. htex will +filter the parsed html based on the .I PATTERN and write the result to stdout. @@ -23,15 +24,18 @@ By default the outerHTML will be written to stdout. .TP \fB\,-o\/\fR, \fB\,--output\/\fR \fI\,OUTPUT_TYPE\/\fR Specify what part of an html tag should be printed to stdout. -Possible values: \fB\,innerhtml\/\fR, \fB\,innertext\/\fR or \fB\,attr_value\/\fR. +Possible values: \fB\,outerhtml\/\fR, \fB\,innerhtml\/\fR, \fB\,innertext\/\fR or \fB\,attr_value\/\fR. -\fB\,innertext\/\fR is different from what a browser would consider innerText. Try out yourself. +\fB\,innertext\/\fR is different from what a browser would consider innerText. +See section \fB\INNER_TEXT\/\fR. .TP \fB\,-e\/\fR, \fB\,--except\/\fR Prints everything except the found html tags' outerHTML. .TP \fB\,-l\/\fR, \fB\,--limit\/\fR \fI\,NUM\/\fR Find maximum \fI\,NUM\/\fR html tags. +.SH INNER_TEXT +Still in progress. .SH EXAMPLES .sp .RS 4 diff --git a/htex.c b/htex.c @@ -132,6 +132,8 @@ enum output_type parseOutputArg(char *arg) { if (arg == NULL) return OUT_OUTER_HTML; + if (strcmp(arg, "outerhtml") == 0) + return OUT_OUTER_HTML; if (strcmp(arg, "innerhtml") == 0) return OUT_INNER_HTML; if (strcmp(arg, "innertext") == 0)