diff --git a/html.lua b/html.lua index d1fad0a..d464ead 100644 --- a/html.lua +++ b/html.lua @@ -95,6 +95,9 @@ local INLINE_TAGS = { template = true, } +local function normalize_whitespace(str) + return str:gsub("%s+", " ") +end function M.make_dom_element( tag_name, parent_elem ) local o = { @@ -160,6 +163,90 @@ function M.make_dom_element( tag_name, parent_elem ) return text end, + + inner_markdown = function(self, in_pre, root_call) + in_pre = in_pre or false + root_call = root_call or true + + if self.tag_name == "script" or self.tag_name == "style" then + return "" + end + + + if self.tag_name == ":text" then + return self.content + end + + local text = "" + local is_list_item = self.tag_name == "li" + local parent_is_ul = self.parent and self.parent.tag_name == "ul" + local parent_is_ol = self.parent and self.parent.tag_name == "ol" + + local is_heading = self.tag_name:match("^h[1-6]$") + local is_pre = self.tag_name == "pre" + + if is_heading then + local level = tonumber(self.tag_name:sub(2)) + text = "\n" .. string.rep("#", level) .. " " + end + + if is_list_item then + if parent_is_ul then + text = "* " + elseif parent_is_ol then + local position = self:get_child_index() + text = position .. ". " + end + end + + + -- Process children + local inner = "" + for _, child in ipairs(self.children) do + inner = inner .. child:inner_markdown(false, in_pre or is_pre or false) + end + + if self.tag_name == "br" then + text = text .. "\n" .. inner + elseif is_pre then + text = text .. inner + elseif is_heading then + text = text .. normalize_whitespace(inner) + elseif self.tag_name == "strong" then + text = text .. "**" .. normalize_whitespace(inner) .. "**" + elseif self.tag_name == "em" then + text = text .. "_" .. normalize_whitespace(inner) .. "_" + elseif self.tag_name == "code" then + local is_block = self.parent and self.parent.tag_name == "pre" + if is_block then + text = text .. "\n```\n" .. inner .. "\n```\n" + else + text = text .. "`" .. normalize_whitespace(inner) .. "`" + end + elseif self.tag_name == "a" then + text = text .. "[" .. normalize_whitespace(inner) .. "]" + + if self.attributes.href then + text = text .. "(" .. self.attributes.href .. ")" + end + else + text = text .. inner + end + + -- Add newlines after block elements + if not INLINE_TAGS[self.tag_name] then + text = text .. "\n" + end + + if root_call then + -- Step 1: Remove whitespace between newlines + text = text:gsub("(\n)%s+(\n)", "%1%2") + -- Step 2: Replace 3+ consecutive newlines with just two + text = text:gsub("\n\n\n+", "\n\n") + end + + return text + end } if parent_elem then @@ -203,7 +290,7 @@ function M.tokenise( content ) local TOKENS = {} -- state - local in_tag = false + local in_tag = nil local currently_opened_quotes = nil local text_memory = "" @@ -213,6 +300,35 @@ function M.tokenise( content ) local char = content:sub(i,i) + -- + -- Taking care of quotes + -- + if in_tag then + -- finding matching quotes + if currently_opened_quotes ~= nil and char == currently_opened_quotes then + currently_opened_quotes = nil + text_memory = text_memory .. char + goto continue + end + + -- Opening a new set of quotes + if currently_opened_quotes == nil and (char == "'" or char == '"') then + currently_opened_quotes = char + text_memory = text_memory .. char + goto continue + end + + -- reaching here means: + -- - we're in a tag, inside quotes + -- - the character is not the closing quote mark + -- So just add it and get on with it. + if currently_opened_quotes ~= nil then + text_memory = text_memory .. char + goto continue + end + end + + if char == "<" then if content:sub(i, i+3) == "