commit 61c3a984fcc4c5dd6ed8d6043cb58e9e0ab7fb59
parent 445f2fb06c2b8cac70ed2e4496d78f9b5523e3ee
Author: Robin <kroekerrobin@gmail.com>
Date: Wed, 16 Aug 2023 12:20:02 +0200
Write documentation
Diffstat:
| M | htex.1 | | | 29 | +++++++++++++++++------------ |
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/htex.1 b/htex.1
@@ -3,7 +3,7 @@
htex \- \fI\,ex\/\fRtract \fI\,ht\/\fRml
.SH SYNOPSIS
.B htex
-[PATTERN] [-e] [-i] [-t] [FILE]
+[PATTERN] [-o OUTPUT_TYPE] [-e] [FILE]
.SH DESCRIPTION
.PP
Receives text from stdin or a file
@@ -16,27 +16,32 @@ The
.I PATTERN
has the following format: <tag_name>[<attr_key>=<attr_value>]
-There are two shortcuts: .<class_name> means [class=<class_name>] and #<id_name>
+There are two shortcuts available: .<class_name> means [class=<class_name>] and #<id_name>
means [id=<id_name>]
By default the outerHTML will be written to stdout.
.TP
-\fB\,-i\/\fR, \fB\,--innerhtml\/\fR
-Return the innerHTML instead of outerHTML.
-.TP
-\fB\,-t\/\fR, \fB\,--innertext\/\fR
-Return the innerText instead of outerHTML. Warning: innerText is different from
-what a browser sees as innerText.
+\fB\,-o\/\fR, \fB\,--output\/\fR
+Specify what part of an html tag should be printed to stdout.
+Possible values: \fB\,innerhtml\/\fR, \fB\,innertext\/\fR or \fB\,attr_value\/\fR.
+
+\fB\,innertext\/\fR is different from what a browser would consider innerText. Try out yourself.
.TP
\fB\,-e\/\fR, \fB\,--except\/\fR
-FUTURE.
+Prints everything except the found html tags' outerHTML.
.SH EXAMPLES
.sp
.RS 4
-cat test.html | htex -i ".o-headline"
+cat test.html | htex -o innerhtml ".o-headline"
htex span test.html
-htex --innertext "input[name=blub]" test.html
+htex -o innertext "input[name=blub]" test.html
+
+htex -o attr_value 'a[href]' test.html
-htex -t "[=someattrvalue]" test.html
+htex "[=someattrvalue]" test.html
+.SH NOTES
+This parser was written partly by reading the html spec at whatwg.org
+and partly by just thinking logically and testing. Don't expect too much
+but have fun.