From 76d7f2e67b1a271545898a9caeb2f67b31cfd131 Mon Sep 17 00:00:00 2001 From: Guilian Date: Sat, 18 Jan 2025 18:52:52 +0100 Subject: [PATCH] fix: proper printing of document --- html.lua | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/html.lua b/html.lua index 4c68f33..578020e 100644 --- a/html.lua +++ b/html.lua @@ -523,23 +523,40 @@ function M.clean_text_nodes(node) end -function M.print_document(node, indent) +function M.tostring(node, indent, include_internal_pseudoelements) -- Default indentation is 0 (root level) indent = indent or 0 + include_internal_pseudoelements = include_internal_pseudoelements or false + + local is_pseudo_element = (node.tag_name or ":root"):sub(1,1) == ":" + local indent_level_str = " " -- Create the indentation string (e.g., " " for each level) local indent_str = string.rep(indent_level_str, indent) if node.tag_name == ":text" then - print(indent_str .. "<:text>\n" .. node.content .. "\n" .. indent_str .. "") - return + local str = "" + + if include_internal_pseudoelements then + str = str .. "<:text>" + end + + str = str .. node.content + + if include_internal_pseudoelements then + str = str .. "" + end + + return str end local node_name = "" + if not is_pseudo_element or include_internal_pseudoelements then -- Print the current node's tag name - node_name = node_name .. indent_str .. "<" .. (node.tag_name or ":root") + node_name = node_name .. "\n" .. indent_str .. "<" .. (node.tag_name or ":root") + end -- Print attributes if any if next(node.attributes) ~= nil then @@ -557,23 +574,40 @@ function M.print_document(node, indent) end end - node_name = node_name .. ">" + if not is_pseudo_element or include_internal_pseudoelements then + node_name = node_name .. ">" + end - print( node_name ) + --print( node_name ) + local next_indent = indent + 1 + if is_pseudo_element and not include_internal_pseudoelements then + next_indent = indent + end -- Recursively print children for _, child in ipairs(node.children) do - M.print_document(child, indent + 1) + node_name = node_name .. M.tostring(child, next_indent, include_internal_pseudoelements) end - -- Print the closing tag - print(indent_str .. "") + if not VOID_TAGS[node.tag_name] and ( not is_pseudo_element or include_internal_pseudoelements ) then + -- Print the closing tag + local end_indent = "" + local closing_text_tag = "" + if node_name:sub(#node_name, #node_name) == ">" and node_name:sub(#node_name - #closing_text_tag + 1, #node_name) ~= closing_text_tag then + end_indent = "\n" .. indent_str + end + node_name = node_name .. end_indent .. "" + end + + return node_name end + + function M.parse( html_string ) local clean_html = M.preprocess( html_string )