From 7ccd068c90e683587dec6954bcd03dc82721cb1e Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 13 Oct 2024 17:53:27 +0200 Subject: [PATCH] Allow to check for mis-typed attributes and accidental AST changes Setting the `QUARTO_JOG_CHECK` environment variable will run checks to identify element attributes that have the wrong type, and will also find filters that modify the input object, but don't return it. Both of these can cause issues with jog. --- src/resources/filters/ast/runemulation.lua | 103 ++++- src/resources/filters/main.lua | 4 + src/resources/filters/modules/attribcheck.lua | 409 ++++++++++++++++++ src/resources/filters/modules/import_all.lua | 1 + 4 files changed, 515 insertions(+), 2 deletions(-) create mode 100644 src/resources/filters/modules/attribcheck.lua diff --git a/src/resources/filters/ast/runemulation.lua b/src/resources/filters/ast/runemulation.lua index 39b193a8f36..82fa02511a1 100644 --- a/src/resources/filters/ast/runemulation.lua +++ b/src/resources/filters/ast/runemulation.lua @@ -47,8 +47,97 @@ local function remove_vault(doc) end end +--- Create a deep copy of a table. +local function copy_table (tbl, depth, seen) + local tp = type(tbl) + if tp == 'table' then + local copy = {} + -- Iterate 'raw' pairs, i.e., without using metamethods + for key, value in next, tbl, nil do + if depth == 'shallow' then + copy[key] = value + else + copy[copy_table(key)] = copy_table(value) + end + end + return setmetatable(copy, getmetatable(tbl)) + elseif tp == 'userdata' then + return tbl:clone() + else -- number, string, boolean, etc + return tbl + end +end + +--- Checks if two tables are equal +function equals(o1, o2) + if o1 == o2 then + return true + end + local o1type = type(o1) + local o2type = type(o2) + if o1type ~= o2type or o1type ~= 'table' then + return false + end + + local keys = {} + + for key1, value1 in pairs(o1) do + local value2 = o2[key1] + if value2 == nil or equals(value1, value2) == false then + return false + end + keys[key1] = true + end + + for key2 in pairs(o2) do + if not keys[key2] then return false end + end + return true +end + +--- Checks if a filter follows the "nondestructive" property. +-- The nondestructive property is fulfilled if filter functions returns +-- an explicit object, or if it returns `nil` while leaving the passed +-- in object unmodified. +-- +-- An error is raised if the property is violated. +-- +-- Only filters with this property can use jog safely, without +-- unintended consequences. +local function check_nondestructive_property (namedfilter) + for name, fn in pairs(namedfilter.filter) do + if type(fn) == 'function' then + local copy = function (x) + local tp = type(x) + return tp ~= 'table' and x:clone() or + (pandoc.utils.type(x) == 'Meta' and pandoc.Meta(x) or copy_table(x)) + end + namedfilter.filter[name] = function (obj, context) + local orig = copy(obj) + local result, descend = fn(obj, context) + if result == nil then + if type(obj) ~= 'table' and not equals(obj, orig) then + warn( + "\nFunction '" .. name .. "' in filter '" .. namedfilter.name .. + "' returned `nil`, but modified the input." + ) + end + -- elseif result.t == obj.t and not rawequal(result, obj) then + -- warn( + -- "\nFunction '" .. name .. "' in filter '" .. namedfilter.name .. + -- "' returned a new object instead of passing the original one through." + -- ) + end + return result, descend + end + end + end + return namedfilter +end + local function run_emulated_filter_chain(doc, filters, afterFilterPass, profiling) init_trace(doc) + local compare_jog_and_walk = os.getenv 'QUARTO_JOG_CHECK' for i, v in ipairs(filters) do local function callback() if v.flags then @@ -79,7 +168,17 @@ local function run_emulated_filter_chain(doc, filters, afterFilterPass, profilin print(pandoc.write(doc, "native")) else _quarto.ast._current_doc = doc - doc = run_emulated_filter(doc, v.filter) + + if compare_jog_and_walk and not v.force_pandoc_walk then + v = check_nondestructive_property(v) + end + doc = run_emulated_filter(doc, v.filter, v.force_pandoc_walk) + + if compare_jog_and_walk and not v.force_pandoc_walk then + -- Types of meta values are only check on assignment. + doc.meta = doc.meta + end + ensure_vault(doc) add_trace(doc, v.name) @@ -204,4 +303,4 @@ function run_as_extended_ast(specTable) end return pandocFilterList -end \ No newline at end of file +end diff --git a/src/resources/filters/main.lua b/src/resources/filters/main.lua index 3aefe5505d0..d23bbfcb8f8 100644 --- a/src/resources/filters/main.lua +++ b/src/resources/filters/main.lua @@ -196,6 +196,10 @@ import("./quarto-init/metainit.lua") -- [/import] +if os.getenv 'QUARTO_JOG_CHECK' then + _quarto.modules.attribcheck.enable_attribute_checks() +end + initCrossrefIndex() initShortcodeHandlers() diff --git a/src/resources/filters/modules/attribcheck.lua b/src/resources/filters/modules/attribcheck.lua new file mode 100644 index 00000000000..3b5c2470727 --- /dev/null +++ b/src/resources/filters/modules/attribcheck.lua @@ -0,0 +1,409 @@ +local io = require 'io' +local pandoc = require 'pandoc' +local utils = require 'pandoc.utils' +local ptype = utils.type + +local InlinesMT = debug.getmetatable(pandoc.Inlines{}) +local BlocksMT = debug.getmetatable(pandoc.Blocks{}) + +local InlineTypes = { + Cite = { + citation = 'List', + content = 'Inlines', + }, + Code = { + attr = 'Attr', + text = 'string', + }, + Emph = { + content = 'Inlines', + }, + Image = { + attr = 'Attr', + caption = 'Inlines', + src = 'string', + title = 'string', + }, + LineBreak = { + }, + Link = { + attr = 'Attr', + content = 'Inlines', + target = 'string', + title = 'string', + }, + Math = { + mathtype = 'string', + text = 'string', + }, + Note = { + content = 'Blocks', + }, + Quoted = { + content = 'Inlines', + quotetype = 'string', + }, + RawInline = { + format = 'string', + text = 'string', + }, + SmallCaps = { + content = 'Inlines', + }, + SoftBreak = { + }, + Space = { + }, + Str = { + text = 'string', + }, + Span = { + attr = 'Attr', + content = 'Inlines', + }, + Strikeout = { + content = 'Inlines', + }, + Strong = { + content = 'Inlines', + }, + Subscript = { + content = 'Inlines', + }, + Superscript = { + content = 'Inlines', + }, + Underline = { + content = 'Inlines', + }, +} + +local BlockTypes = { + BlockQuote = { + content = 'Blocks', + }, + BulletList = { + content = {sequence = 'Blocks'} + }, + CodeBlock = { + attr = 'Attr', + text = 'string', + }, + DefinitionList = { + content = {sequence = 'DefinitionItem'}, + }, + Div = { + attr = 'Attr', + content = 'Blocks', + }, + Figure = { + attr = 'Attr', + caption = 'Caption', + content = 'Blocks', + }, + Header = { + attr = 'Attr', + content = 'Inlines', + level = 'integer', + }, + HorizontalRule = { + content = 'Inlines', + }, + LineBlock = { + content = {sequence = 'Inlines'}, + }, + OrderedList = { + content = {sequence = 'Blocks'}, + }, + Para = { + content = 'Inlines', + }, + Plain = { + content = 'Inlines', + }, + RawBlock = { + content = 'Inlines', + }, + Table = { + attr = 'Attr', + caption = 'Caption', + colspecs = {sequence = 'ColSpec'}, + bodies = {sequence = 'TableBody'}, + head = 'TableHead', + foot = 'TableFoot', + }, +} + +local function warn_conversion (expected, actual, shift, extrainfo) + local dbginfo = debug.getinfo(5 + (shift or 0), 'Sln') + local dbginfostr = 'no detailed debug info available' + if dbginfo then + dbginfostr = (dbginfo.name or '') .. ' in ' .. dbginfo.source + .. ':' .. dbginfo.currentline + end + warn(actual .. ' instead of ' .. expected .. ': ' .. dbginfostr + .. (extrainfo and '\n' .. extrainfo or '')) +end + +local makeCaption = pandoc.Caption +if not makeCaption then + makeCaption = function (long, short) + return { + long = long, + short = short + } + end +end + +local ensure_type +ensure_type = { + Attr = function (obj) + local pt = ptype(obj) + return pt == 'Attr' + and obj + or pandoc.Attr(obj) + end, + + Blocks = function (obj, shift, extrainfo) + shift = shift or 0 + local pt = ptype(obj) + if pt == 'Blocks' then + return obj + elseif pt == 'List' or pt == 'table' then + warn_conversion('Blocks', pt, shift, tostring(obj)) + return setmetatable(obj, BlocksMT) + elseif pt == 'Inline' then + warn_conversion('Blocks', pt, shift, tostring(obj)) + return setmetatable({pandoc.Plain{obj}}, BlocksMT) + elseif pt == 'Inlines' then + warn_conversion('Blocks', pt, shift, tostring(obj)) + return setmetatable({pandoc.Plain(obj)}, BlocksMT) + else + warn_conversion('Blocks', pt, shift, tostring(obj)) + return pandoc.Blocks(obj) + end + end, + + Caption = function (obj, shift, extrainfo) + local tp = ptype(obj) + if tp == 'Caption' then + return obj + elseif tp == 'table' and obj.long then + if pandoc.Caption then warn_conversion('Caption', tp) end + local long = ensure_type['Blocks'](obj.long, shift + 1, extrainfo) + local short + if obj.short then + short = ensure_type['Inlines'](obj.short, shift + 1, extrainfo) + end + return makeCaption(long, short) + else + if pandoc.Caption then warn_conversion('Caption', tp) end + local blocks = ensure_type['Blocks'](obj, shift + 1, extrainfo) + if pandoc.Caption then + return pandoc.Caption(blocks) + else + for i = 1, #obj do + obj[i] = nil + end + obj.long = blocks + return obj + end + end + end, + + DefinitionItem = function (obj, shift) + shift = shift or 0 + obj[1] = ensure_type['Inlines'](obj[1], shift + 1) + obj[2] = ensure_type[{ sequence = 'Blocks'}](obj[2], shift + 1) + return obj + end, + + Inlines = function (obj, shift, extrainfo) + shift = shift or 0 + local pt = ptype(obj) + if pt == 'Inlines' then + return obj + elseif pt == 'List' or pt == 'table' then + warn_conversion('Inlines', pt, shift, extrainfo) + return setmetatable(obj, InlinesMT) + else + warn_conversion('Inlines', pt, shift, extrainfo) + return pandoc.Inlines(obj) + end + end, + + Meta = function (obj, shift) + if ptype(obj) ~= 'Meta' then + warn_conversion('Meta', ptype(obj), shift) + return pandoc.Meta(obj) + end + + local function all_of_type (tbl, typename) + for _, value in ipairs(tbl) do + if ptype(value) ~= typename then + return false + end + end + return true + end + + local function ensure_meta(mv, depth, curfield) + local tmv = ptype(mv) + + if pandoc.List{'Inlines', 'Blocks', 'string', 'boolean'}:includes(tmv) then + return mv + elseif tmv == 'Block' then + warn_conversion('Blocks', tmv, depth, curfield) + return pandoc.Blocks(mv) + elseif tmv == 'Inline' then + warn_conversion('Inlines', tmv, depth, curfield) + return pandoc.Inlines(mv) + elseif tmv == 'List' or (tmv == 'table' and #mv > 0) then + if #mv == 0 then + return mv + elseif all_of_type(mv, 'Block') then + return ensure_type['Blocks'](mv, depth, curfield) + elseif all_of_type(mv, 'Inline') then + return ensure_type['Inlines'](mv, depth, curfield) + else + return pandoc.List.map( + mv, function(m) return ensure_meta(m, depth+1, curfield) end) + end + elseif tmv == 'table' or tmv == 'Meta' then + for key, v in pairs(mv) do + mv[key] = ensure_meta(v, depth + 1, key) + end + return mv + elseif tmv == 'number' then + warn_conversion('metavalue', tmv, depth, curfield) + return tostring(mv) + end + + warn_conversion('metavalue', tmv, depth, curfield) + return nil + end + + return ensure_meta(obj, shift or 1) + end, + + TableBody = function (obj, shift) + local pt = ptype(obj) + if pt ~= 'table' then + warn_conversion('table (TableBody)', pt, shift + 1) + end + return obj + end, + + TableFoot = function (obj) + local pt = ptype(obj) + if pt ~= 'pandoc TableFoot' and pt ~= 'TableFoot' then + error('Cannot auto-convert to TableFoot, got ' .. pt) + end + return obj + end, + + TableHead = function (obj) + local pt = ptype(obj) + if pt ~= 'pandoc TableHead' and pt ~= 'TableHead' then + error('Cannot auto-convert to TableHead, got ' .. pt) + end + return obj + end, + + integer = function (obj) + if type(obj) ~= 'number' or math.floor(obj) ~= obj then + warn_conversion('integer', type(obj)) + return math.floor(tonumber(obj)) + end + return obj + end, + + string = function (obj) + if type(obj) ~= 'string' then + warn_conversion('string', type(obj)) + return tostring(obj) + end + return obj + end, +} +local ensure_type_metatable = { + __index = function (tbl, key) + if type(key) == 'table' then + if key.sequence then + return function (obj) + local pt = ptype(obj) + if pt == 'table' or pt == 'List' then + return pandoc.List.map(obj, tbl[key.sequence]) + end + end + end + end + return function (obj) + warn_conversion(key, ptype(obj)) + return obj + end + end +} +setmetatable(ensure_type, ensure_type_metatable) + +local CaptionMT = pandoc.Caption and debug.getmetatable(pandoc.Caption{}) +local CellMT = debug.getmetatable(pandoc.Cell{}) +local InlineMT = debug.getmetatable(pandoc.Space()) +local BlockMT = debug.getmetatable(pandoc.HorizontalRule()) +local PandocMT = debug.getmetatable(pandoc.Pandoc{}) +local default_setter = PandocMT.setters.blocks + +local function enable_attribute_checks() + for fieldname, setter in pairs(InlineMT.setters) do + InlineMT.setters[fieldname] = function (obj, key, value) + local expected_type = InlineTypes[obj.tag][key] + value = expected_type + and ensure_type[expected_type](value, 0) + or value + setter(obj, key, value) + end + end + for fieldname, setter in pairs(BlockMT.setters) do + BlockMT.setters[fieldname] = function (obj, key, value) + local expected_type = BlockTypes[obj.tag][fieldname] + value = expected_type + and ensure_type[expected_type](value, 0) + or value + setter(obj, key, value) + end + end + + -- Caption (currently only in pandoc dev version) + if CaptionMT then + CaptionMT.setters.short = function (obj, key, value) + if value ~= nil then + default_setter(obj, key, ensure_type['Inlines'](value)) + end + end + CaptionMT.setters.long = function (obj, key, value) + default_setter(obj, key, ensure_type['Blocks'](value)) + end + end + + -- Cell + CellMT.setters.col_span = function (obj, key, value) + default_setter(obj, key, ensure_type['integer'](value)) + end + CellMT.setters.contents = function (obj, key, value) + default_setter(obj, key, ensure_type['Blocks'](value)) + end + + -- Pandoc + PandocMT.setters.meta = function (obj, key, value) + default_setter(obj, key, ensure_type['Meta'](value)) + end + PandocMT.setters.blocks = function (obj, key, value) + default_setter(obj, key, ensure_type['Blocks'](value)) + end +end + +return { + ensure_type_verbose = ensure_type, + + enable_attribute_checks = enable_attribute_checks +} diff --git a/src/resources/filters/modules/import_all.lua b/src/resources/filters/modules/import_all.lua index 1369a183c9a..79355b2c1a6 100644 --- a/src/resources/filters/modules/import_all.lua +++ b/src/resources/filters/modules/import_all.lua @@ -3,6 +3,7 @@ _quarto.modules = { astshortcode = require("modules/astshortcode"), + attribcheck = require("modules/attribcheck"), authors = require("modules/authors"), brand = require("modules/brand/brand"), callouts = require("modules/callouts"),