asw/archive/packs/pandoc/asw.lua
exe.dev user e47a9f4401 asw-v01: archive deferred content (packs, site, lab, legacy examples)
- 2.1: packs/ -> archive/packs/
- 2.2: site/ -> archive/site/
- 2.3: src/lab/ -> archive/lab/
- 2.4: examples/ -> archive/examples-legacy/ (SSI-based)
2026-06-07 10:39:21 +02:00

272 lines
8.8 KiB
Lua

-- asw.lua — Pandoc Lua filter for Agentic Semantic Web (ASW)
-- Converts GFM markdown constructs to ASW data-attribute HTML
--
-- Usage:
-- pandoc input.md --from gfm --lua-filter asw.lua --template asw.html5 -o output.html
--
-- Transforms:
-- - [ ] item → <li data-task="todo">
-- - [x] item → <li data-task="done">
-- > [!NOTE] → <div data-callout="note">
-- > [!WARNING] → <div data-callout="warning">
-- > [!TIP] → <div data-callout="tip">
-- > [!ERROR] → <div data-callout="error">
-- [[target]] → <a data-wikilink href="#target">target</a>
-- [[target|label]] → <a data-wikilink href="#target">label</a>
-- ── Callouts ─────────────────────────────────────────────────────────────────
-- GitHub callout syntax: > [!TYPE]\n> content → <div data-callout="type">
local CALLOUT_MAP = {
NOTE = "note",
INFO = "note",
IMPORTANT = "note",
WARNING = "warning",
CAUTION = "warning",
TIP = "tip",
ERROR = "error",
DANGER = "error",
}
local CALLOUT_LABEL = {
note = "Note",
warning = "Warning",
tip = "Tip",
error = "Error",
}
function BlockQuote(el)
local first = el.content[1]
if not first or (first.t ~= "Para" and first.t ~= "Plain") then return el end
-- The first inline should be a Str like "[!NOTE]"
local first_inline = first.content and first.content[1]
if not first_inline or first_inline.t ~= "Str" then return el end
local marker = first_inline.text:match("^%[!(%u+)%]$")
if not marker then return el end
local callout_type = CALLOUT_MAP[marker]
if not callout_type then return el end
local label = CALLOUT_LABEL[callout_type] or marker:sub(1,1) .. marker:sub(2):lower()
local blocks = {
pandoc.RawBlock("html", '<div data-callout="' .. callout_type .. '">'),
pandoc.RawBlock("html", '<p data-callout-title>' .. label .. '</p>'),
}
-- First para: strip the [!MARKER] token and optional following SoftBreak/Space
local inlines = first.content
local start_i = 2
if inlines[start_i] and
(inlines[start_i].t == "SoftBreak" or inlines[start_i].t == "Space") then
start_i = start_i + 1
end
local rest_inlines = {}
for i = start_i, #inlines do
table.insert(rest_inlines, inlines[i])
end
if #rest_inlines > 0 then
table.insert(blocks, pandoc.Para(rest_inlines))
end
-- Remaining blocks in the blockquote
for i = 2, #el.content do
table.insert(blocks, el.content[i])
end
table.insert(blocks, pandoc.RawBlock("html", '</div>'))
return blocks
end
-- ── Task Lists ───────────────────────────────────────────────────────────────
-- GFM: - [ ] → <li data-task="todo">, - [x] → <li data-task="done">
-- Pandoc (GFM mode) emits checkboxes as RawInline html: <input type="checkbox" ...>
-- Pandoc 3.x encodes GFM task list checkboxes as Unicode Str:
-- ☒ (U+2612) = checked/done, ☐ (U+2610) = unchecked/todo
local CHECKBOX_DONE = "\xe2\x98\x92" -- ☒ UTF-8
local CHECKBOX_TODO = "\xe2\x98\x90" -- ☐ UTF-8
local function checkbox_state(inline)
if inline and inline.t == "Str" then
if inline.text == CHECKBOX_DONE then return "done" end
if inline.text == CHECKBOX_TODO then return "todo" end
end
return nil
end
function BulletList(el)
-- First pass: check if any item is a task
local has_tasks = false
for _, item in ipairs(el.content) do
local first = item[1]
if first and (first.t == "Plain" or first.t == "Para") and first.content[1] then
if checkbox_state(first.content[1]) then
has_tasks = true
break
end
end
end
if not has_tasks then return el end
-- Rebuild as explicit HTML with data-task attributes
local parts = { pandoc.RawBlock("html", "<ul>") }
for _, item in ipairs(el.content) do
local first = item[1]
local state = nil
if first and (first.t == "Plain" or first.t == "Para") and first.content[1] then
state = checkbox_state(first.content[1])
end
if state then
table.insert(parts, pandoc.RawBlock("html", '<li data-task="' .. state .. '">'))
-- Strip checkbox inline (index 1) and optional space (index 2)
local trimmed = {}
for i, inline in ipairs(first.content) do
if i == 1 then
-- skip checkbox
elseif i == 2 and inline.t == "Space" then
-- skip leading space after checkbox
else
table.insert(trimmed, inline)
end
end
if #trimmed > 0 then
table.insert(parts, pandoc.Plain(trimmed))
end
-- Remaining blocks in this item
for i = 2, #item do
table.insert(parts, item[i])
end
table.insert(parts, pandoc.RawBlock("html", "</li>"))
else
-- Regular list item — emit verbatim
table.insert(parts, pandoc.RawBlock("html", "<li>"))
for _, block in ipairs(item) do
table.insert(parts, block)
end
table.insert(parts, pandoc.RawBlock("html", "</li>"))
end
end
table.insert(parts, pandoc.RawBlock("html", "</ul>"))
return parts
end
-- ── Wikilinks ─────────────────────────────────────────────────────────────────
-- [[target|label]] or [[target]] → <a data-wikilink href="#slug">label</a>
--
-- Pandoc 3.x tokenizes at whitespace, so [[target|label with spaces]] arrives as
-- multiple Str/Space tokens. process_wikilinks coalesces adjacent Str+Space runs
-- into a combined string before scanning for [[...]] patterns, then reconstructs
-- the inline list. Non-Str/Space inlines (Bold, Emph, Code, etc.) pass through.
local function slugify(text)
return text:lower():gsub("[%s_]+", "-"):gsub("[^%w%-]", "")
end
-- Scan combined for [[...]] patterns and emit inlines.
-- Returns a list of inlines with wikilinks replaced, plus a changed flag.
local function scan_for_wikilinks(combined)
local parts = {}
local pos = 1
local changed = false
while pos <= #combined do
local s, e = combined:find("%[%[", pos)
if not s then
if pos <= #combined then
table.insert(parts, pandoc.Str(combined:sub(pos)))
end
break
end
if s > pos then
table.insert(parts, pandoc.Str(combined:sub(pos, s - 1)))
end
local close_s, close_e = combined:find("%]%]", e + 1)
if not close_s then
table.insert(parts, pandoc.Str("[["))
pos = e + 1
else
local inner = combined:sub(e + 1, close_s - 1)
local target, label = inner:match("^([^|]+)|(.+)$")
if not target then
target = inner
label = inner
end
target = target:match("^%s*(.-)%s*$")
label = label:match("^%s*(.-)%s*$")
table.insert(parts, pandoc.RawInline("html",
'<a data-wikilink href="#' .. slugify(target) .. '">' .. label .. '</a>'))
pos = close_e + 1
changed = true
end
end
return parts, changed
end
local function process_wikilinks(inlines)
-- Pass 1: coalesce runs of Str+Space into combined strings, scan for wikilinks.
-- Non-text inlines (Emph, Strong, Code, RawInline, etc.) flush any pending run first.
local result = {}
local changed = false
local i = 1
while i <= #inlines do
local el = inlines[i]
if el.t == "Str" or el.t == "Space" then
-- Collect a contiguous Str+Space run
local run = {}
local combined = ""
while i <= #inlines and (inlines[i].t == "Str" or inlines[i].t == "Space") do
if inlines[i].t == "Str" then
combined = combined .. inlines[i].text
else
combined = combined .. " "
end
table.insert(run, inlines[i])
i = i + 1
end
if combined:find("%[%[") then
local parts, any_changed = scan_for_wikilinks(combined)
if any_changed then
for _, part in ipairs(parts) do table.insert(result, part) end
changed = true
else
-- No wikilinks resolved — emit original tokens unchanged
for _, tok in ipairs(run) do table.insert(result, tok) end
end
else
-- No [[ at all — emit original tokens unchanged
for _, tok in ipairs(run) do table.insert(result, tok) end
end
else
table.insert(result, el)
i = i + 1
end
end
return changed and result or nil
end
function Para(el)
local r = process_wikilinks(el.content)
if r then return pandoc.Para(r) end
end
function Plain(el)
local r = process_wikilinks(el.content)
if r then return pandoc.Plain(r) end
end
function Header(el)
local r = process_wikilinks(el.content)
if r then return pandoc.Header(el.level, r, el.attr) end
end