feat: get element inner text (--text option)

main
Guilian 2025-01-20 17:05:04 +01:00
parent bd2b04216c
commit c923159d7a
Signed by: Guilian
GPG Key ID: B86CC9678982ED8C
2 changed files with 89 additions and 3 deletions

View File

@ -39,6 +39,62 @@ local VOID_TAGS = {
wbr = true,
}
local INLINE_TAGS = {
-- Text formatting
a = true,
abbr = true,
b = true,
bdi = true,
bdo = true,
cite = true,
code = true,
data = true,
dfn = true,
em = true,
i = true,
kbd = true,
mark = true,
q = true,
ruby = true,
s = true,
samp = true,
small = true,
span = true,
strong = true,
sub = true,
sup = true,
time = true,
u = true,
var = true,
-- Interactive elements
button = true,
label = true,
select = true,
textarea = true,
-- Media/content
img = true,
picture = true,
map = true,
object = true,
-- Line break
br = true,
wbr = true,
-- Forms
input = true,
output = true,
progress = true,
meter = true,
-- Scripting
script = true,
noscript = true,
template = true,
}
function M.make_dom_element( tag_name, parent_elem )
local o = {
@ -86,6 +142,23 @@ function M.make_dom_element( tag_name, parent_elem )
for _, child in ipairs(self.children or {}) do
child:foreach( fn )
end
end,
inner_text = function(self)
if self.tag_name == ":text" then
return self.content
end
local text = ""
for _, child in ipairs(self.children) do
text = text .. child:inner_text()
if not INLINE_TAGS[child.tag_name] then
text = text .. "\n"
end
end
return text
end
}
@ -524,7 +597,7 @@ function M.clean_text_nodes(node)
return
end
node.content = trim( node.content:gsub("%s+", " ") )
node.content = node.content:gsub("%s+", " ")
end

View File

@ -33,16 +33,19 @@ end
local FLAGS = {
FIRST_ONLY = {},
NO_PRINT_ERRORS = {},
INNER_TEXT = {},
}
local LONGHAND_FLAGS = {
["first-only"] = FLAGS.FIRST_ONLY,
["quiet"] = FLAGS.NO_PRINT_ERRORS
["quiet"] = FLAGS.NO_PRINT_ERRORS,
["text"] = FLAGS.INNER_TEXT,
}
local SHORTHAND_FLAGS = {
["1"] = FLAGS.FIRST_ONLY,
["q"] = FLAGS.NO_PRINT_ERRORS,
["t"] = FLAGS.INNER_TEXT,
}
@ -210,6 +213,12 @@ end
if flags[FLAGS.FIRST_ONLY] then
if #elements > 0 then
if flags[FLAGS.INNER_TEXT] then
logger.print( elements[1]:inner_text() )
return 0
end
logger.print( HTML.tostring( elements[1] ) )
end
@ -217,5 +226,9 @@ if flags[FLAGS.FIRST_ONLY] then
end
for _, el in ipairs(elements) do
logger.print( HTML.tostring(el) )
if flags[FLAGS.INNER_TEXT] then
logger.print( el:inner_text() )
else
logger.print( HTML.tostring(el) )
end
end