-- asw.lua — Pandoc Lua filter for Agentic Semantic Web (ASW) -- Converts GFM markdown constructs to ASW data-attribute HTML -- -- Usage: -- pandoc input.md --from gfm --lua-filter asw.lua --template asw.html5 -o output.html -- -- Transforms: -- - [ ] item →
  • -- - [x] item →
  • -- > [!NOTE] →
    -- > [!WARNING] →
    -- > [!TIP] →
    -- > [!ERROR] →
    -- [[target]] → target -- [[target|label]] → label -- ── Callouts ───────────────────────────────────────────────────────────────── -- GitHub callout syntax: > [!TYPE]\n> content →
    local CALLOUT_MAP = { NOTE = "note", INFO = "note", IMPORTANT = "note", WARNING = "warning", CAUTION = "warning", TIP = "tip", ERROR = "error", DANGER = "error", } local CALLOUT_LABEL = { note = "Note", warning = "Warning", tip = "Tip", error = "Error", } function BlockQuote(el) local first = el.content[1] if not first or (first.t ~= "Para" and first.t ~= "Plain") then return el end -- The first inline should be a Str like "[!NOTE]" local first_inline = first.content and first.content[1] if not first_inline or first_inline.t ~= "Str" then return el end local marker = first_inline.text:match("^%[!(%u+)%]$") if not marker then return el end local callout_type = CALLOUT_MAP[marker] if not callout_type then return el end local label = CALLOUT_LABEL[callout_type] or marker:sub(1,1) .. marker:sub(2):lower() local blocks = { pandoc.RawBlock("html", '
    '), pandoc.RawBlock("html", '

    ' .. label .. '

    '), } -- First para: strip the [!MARKER] token and optional following SoftBreak/Space local inlines = first.content local start_i = 2 if inlines[start_i] and (inlines[start_i].t == "SoftBreak" or inlines[start_i].t == "Space") then start_i = start_i + 1 end local rest_inlines = {} for i = start_i, #inlines do table.insert(rest_inlines, inlines[i]) end if #rest_inlines > 0 then table.insert(blocks, pandoc.Para(rest_inlines)) end -- Remaining blocks in the blockquote for i = 2, #el.content do table.insert(blocks, el.content[i]) end table.insert(blocks, pandoc.RawBlock("html", '
    ')) return blocks end -- ── Task Lists ─────────────────────────────────────────────────────────────── -- GFM: - [ ] →
  • , - [x] →
  • -- Pandoc (GFM mode) emits checkboxes as RawInline html: -- Pandoc 3.x encodes GFM task list checkboxes as Unicode Str: -- ☒ (U+2612) = checked/done, ☐ (U+2610) = unchecked/todo local CHECKBOX_DONE = "\xe2\x98\x92" -- ☒ UTF-8 local CHECKBOX_TODO = "\xe2\x98\x90" -- ☐ UTF-8 local function checkbox_state(inline) if inline and inline.t == "Str" then if inline.text == CHECKBOX_DONE then return "done" end if inline.text == CHECKBOX_TODO then return "todo" end end return nil end function BulletList(el) -- First pass: check if any item is a task local has_tasks = false for _, item in ipairs(el.content) do local first = item[1] if first and (first.t == "Plain" or first.t == "Para") and first.content[1] then if checkbox_state(first.content[1]) then has_tasks = true break end end end if not has_tasks then return el end -- Rebuild as explicit HTML with data-task attributes local parts = { pandoc.RawBlock("html", "")) return parts end -- ── Wikilinks ───────────────────────────────────────────────────────────────── -- [[target|label]] or [[target]] → label -- -- Pandoc 3.x tokenizes at whitespace, so [[target|label with spaces]] arrives as -- multiple Str/Space tokens. process_wikilinks coalesces adjacent Str+Space runs -- into a combined string before scanning for [[...]] patterns, then reconstructs -- the inline list. Non-Str/Space inlines (Bold, Emph, Code, etc.) pass through. local function slugify(text) return text:lower():gsub("[%s_]+", "-"):gsub("[^%w%-]", "") end -- Scan combined for [[...]] patterns and emit inlines. -- Returns a list of inlines with wikilinks replaced, plus a changed flag. local function scan_for_wikilinks(combined) local parts = {} local pos = 1 local changed = false while pos <= #combined do local s, e = combined:find("%[%[", pos) if not s then if pos <= #combined then table.insert(parts, pandoc.Str(combined:sub(pos))) end break end if s > pos then table.insert(parts, pandoc.Str(combined:sub(pos, s - 1))) end local close_s, close_e = combined:find("%]%]", e + 1) if not close_s then table.insert(parts, pandoc.Str("[[")) pos = e + 1 else local inner = combined:sub(e + 1, close_s - 1) local target, label = inner:match("^([^|]+)|(.+)$") if not target then target = inner label = inner end target = target:match("^%s*(.-)%s*$") label = label:match("^%s*(.-)%s*$") table.insert(parts, pandoc.RawInline("html", '' .. label .. '')) pos = close_e + 1 changed = true end end return parts, changed end local function process_wikilinks(inlines) -- Pass 1: coalesce runs of Str+Space into combined strings, scan for wikilinks. -- Non-text inlines (Emph, Strong, Code, RawInline, etc.) flush any pending run first. local result = {} local changed = false local i = 1 while i <= #inlines do local el = inlines[i] if el.t == "Str" or el.t == "Space" then -- Collect a contiguous Str+Space run local run = {} local combined = "" while i <= #inlines and (inlines[i].t == "Str" or inlines[i].t == "Space") do if inlines[i].t == "Str" then combined = combined .. inlines[i].text else combined = combined .. " " end table.insert(run, inlines[i]) i = i + 1 end if combined:find("%[%[") then local parts, any_changed = scan_for_wikilinks(combined) if any_changed then for _, part in ipairs(parts) do table.insert(result, part) end changed = true else -- No wikilinks resolved — emit original tokens unchanged for _, tok in ipairs(run) do table.insert(result, tok) end end else -- No [[ at all — emit original tokens unchanged for _, tok in ipairs(run) do table.insert(result, tok) end end else table.insert(result, el) i = i + 1 end end return changed and result or nil end function Para(el) local r = process_wikilinks(el.content) if r then return pandoc.Para(r) end end function Plain(el) local r = process_wikilinks(el.content) if r then return pandoc.Plain(r) end end function Header(el) local r = process_wikilinks(el.content) if r then return pandoc.Header(el.level, r, el.attr) end end