#!/bin/awk -f
# parasite.awk last-updated: 2026-03-01
# ~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
#
# convert markdown-ish paragraphs, links, headers and
# horizontal rules into html.
#
# the script should be compatible with POSIX awk (run with -P
# to verify).
#
# ~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
#
# syntax:
#
# * paragraphs are separated by empty lines
# * links: [gay sex](https://gay.sex)
# *
: # headline
# * : ## headline
# * : ### headline
# *
(horizontal rule): --- (three dashes on an empty line)
#
# ~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
#
# wrote this file on 2026-02-26
# and dedicated it to the public domain (CC0 1.0)
#
# ~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
#
# sources:
#
# => https://www.grymoire.com/Unix/Awk.html
# => https://www.gnu.org/software/gawk/manual/html_node/index.html
#
# ~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
function html_escaped(word) {
gsub(/&/, "\\&", word);
gsub(/, "\\<", word);
gsub(/>/, "\\>", word);
gsub(/"/, "\\"", word);
gsub(/'/, "\\'", word);
return word;
}
function does_match(str, pat) {
cpy = str;
return sub(pat, "", cpy);
}
function handle_links() {
if ( (link_start_i = match(line, "!\\[[^\\[\\]]+\\]\\([^\\(\\)]+\\)")) ) { # image
is_image = 1;
} else if ( (link_start_i = match(line, "\\[[^\\[\\]]+\\]\\([^\\(\\)]+\\)")) ) { # link {
is_link = 1;
}
if (link_start_i) {
if (is_image) {
start_offset = 2;
} else {
start_offset = 1;
}
split(substr(line, link_start_i + start_offset), link_splt, /\][[:space:]]*\(/ );
sub(/\).*/, "", link_splt[2]); #remove trailing ) and text (TODO: handle ')' in links...)
link_txt = link_splt[1];
link_url = link_splt[2];
# lol, lmao even
gsub(/</, "<", link_url);
gsub(/>/, ">", link_url);
gsub(/"/, "\"", link_url);
gsub(/&/, "\\&", link_url);
gsub(/'/, "%27", link_url); # handle single quote: %-encoding https://en.wikipedia.org/wiki/Percent-encoding
if (is_image) {
link_html = "
"
} else {
link_html = "" link_txt "";
}
gsub(/&/, "\\\\&", link_html) # lmao: escape "&" as the sub below will treat "&" as special char.
if (is_image) {
sub("!\\[[^\\[\\]]+\\]\\([^\\(\\)]+\\)", link_html, line);
} else {
sub("\\[[^\\[\\]]+\\]\\([^\\(\\)]+\\)", link_html, line);
}
}
}
function handle_headlines() {
if (substr($1, 1, 1) == "#") { # headlines
hlevel = 1;
while (substr($1, hlevel + 1, 1) == "#") {
++hlevel;
}
if (hlevel > 3) {
hlevel = 3;
}
if (is_first_elem) {
before = "";
is_first_elem = 0;
} else {
before = "\n";
}
line = before " ";
for (word = 2; word <= NF; ++word) {
line = line html_escaped($word) " ";
}
handle_links()
line = line "\n";
out_html = out_html line
next;
}
}
function handle_horizontal_rule() {
if (NF == 1 && $1 ~ /---/) {
out_html = out_html "\n
\n"
next;
}
}
BEGIN {
in_p = 0;
is_first_elem = 1;
out_html = "";
}
/^[[:space:]]*(---)?[[:space:]]*$/ { # empty or blank line, or hline
if (in_p) {
out_html = out_html "\n" ;
in_p = 0;
}
}
!/^[[:space:]]*$/ { # non-empty/non-blank line
if (!in_p) {
handle_headlines();
handle_horizontal_rule();
if (is_first_elem) {
before="";
} else {
before="\n"
}
out_html = out_html before "\n";
in_p = 1;
is_first_elem = 0;
}
line = ""
for (word = 1; word <= NF; ++word) {
line = line html_escaped($word) " ";
}
line = line "\n" # uncomment if you don't want to preserve newlines in the html
handle_links();
out_html = out_html line;
}
END {
if (in_p) {
out_html = out_html "
\n"
}
#print "html for " FILENAME;
print out_html;
}