diff --git a/html.lua b/html.lua index da43d7f..5915bf6 100644 --- a/html.lua +++ b/html.lua @@ -39,6 +39,62 @@ local VOID_TAGS = { wbr = true, } +local INLINE_TAGS = { + -- Text formatting + a = true, + abbr = true, + b = true, + bdi = true, + bdo = true, + cite = true, + code = true, + data = true, + dfn = true, + em = true, + i = true, + kbd = true, + mark = true, + q = true, + ruby = true, + s = true, + samp = true, + small = true, + span = true, + strong = true, + sub = true, + sup = true, + time = true, + u = true, + var = true, + + -- Interactive elements + button = true, + label = true, + select = true, + textarea = true, + + -- Media/content + img = true, + picture = true, + map = true, + object = true, + + -- Line break + br = true, + wbr = true, + + -- Forms + input = true, + output = true, + progress = true, + meter = true, + + -- Scripting + script = true, + noscript = true, + template = true, +} + function M.make_dom_element( tag_name, parent_elem ) local o = { @@ -86,6 +142,23 @@ function M.make_dom_element( tag_name, parent_elem ) for _, child in ipairs(self.children or {}) do child:foreach( fn ) end + end, + + inner_text = function(self) + if self.tag_name == ":text" then + return self.content + end + + local text = "" + for _, child in ipairs(self.children) do + text = text .. child:inner_text() + + if not INLINE_TAGS[child.tag_name] then + text = text .. "\n" + end + end + + return text end } @@ -524,7 +597,7 @@ function M.clean_text_nodes(node) return end - node.content = trim( node.content:gsub("%s+", " ") ) + node.content = node.content:gsub("%s+", " ") end diff --git a/main.lua b/main.lua index aa9e5fb..cfde2c2 100644 --- a/main.lua +++ b/main.lua @@ -33,16 +33,19 @@ end local FLAGS = { FIRST_ONLY = {}, NO_PRINT_ERRORS = {}, + INNER_TEXT = {}, } local LONGHAND_FLAGS = { ["first-only"] = FLAGS.FIRST_ONLY, - ["quiet"] = FLAGS.NO_PRINT_ERRORS + ["quiet"] = FLAGS.NO_PRINT_ERRORS, + ["text"] = FLAGS.INNER_TEXT, } local SHORTHAND_FLAGS = { ["1"] = FLAGS.FIRST_ONLY, ["q"] = FLAGS.NO_PRINT_ERRORS, + ["t"] = FLAGS.INNER_TEXT, } @@ -210,6 +213,12 @@ end if flags[FLAGS.FIRST_ONLY] then if #elements > 0 then + + if flags[FLAGS.INNER_TEXT] then + logger.print( elements[1]:inner_text() ) + return 0 + end + logger.print( HTML.tostring( elements[1] ) ) end @@ -217,5 +226,9 @@ if flags[FLAGS.FIRST_ONLY] then end for _, el in ipairs(elements) do - logger.print( HTML.tostring(el) ) + if flags[FLAGS.INNER_TEXT] then + logger.print( el:inner_text() ) + else + logger.print( HTML.tostring(el) ) + end end