feat: print select attribute of matched elements

main
Guilian 2025-01-25 17:31:09 +01:00
parent 40c4b464dc
commit 4af0f68fa9
Signed by: Guilian
GPG Key ID: B86CC9678982ED8C
3 changed files with 149 additions and 54 deletions

View File

@ -34,9 +34,7 @@ Supported combinators are all the "basic" ones:
### TODO ### TODO
- [ ] `--text` option to only get the text in the matched elements
- [ ] Universal selector (`*` to match any element) - [ ] Universal selector (`*` to match any element)
- [ ] Attribute selectors (`[attr="value"]`)
- [ ] A way to "group" selectors, e.g. `aside {p, footer}` to select all `p`s and `footer`s in `aside`s ? - [ ] A way to "group" selectors, e.g. `aside {p, footer}` to select all `p`s and `footer`s in `aside`s ?
## Usage ## Usage
@ -57,6 +55,8 @@ Where:
* `-1`, `--first-only`: Return only the first match * `-1`, `--first-only`: Return only the first match
* `-e`, `--errors`: print warnings * `-e`, `--errors`: print warnings
* `-t`, `--text`: Print only the [innerText](https://developer.mozilla.org/fr/docs/Web/API/HTMLElement/innerText) of the matched elements * `-t`, `--text`: Print only the [innerText](https://developer.mozilla.org/fr/docs/Web/API/HTMLElement/innerText) of the matched elements
* `-t`, `--text`: Print only the [innerText](https://developer.mozilla.org/fr/
* `-a`, `--select-attribute`: Print the value of the attribute on matched elements. Supersedes -t.
## Motivation ## Motivation

View File

@ -159,7 +159,7 @@ function M.make_dom_element( tag_name, parent_elem )
end end
return text return text
end end,
} }
if parent_elem then if parent_elem then
@ -227,7 +227,7 @@ function M.tokenise( content )
end end
if content:sub(i, i+1) == "<!" then if content:sub(i, i+1) == "<!" then
i = content:find(">", i) i = content:find(">", i, true)
goto continue goto continue
end end
@ -259,7 +259,7 @@ function M.tokenise( content )
if RAW_TEXT_TAGS[word] then if RAW_TEXT_TAGS[word] then
logger.printerr("Warning: "..word.." tags may contain text that would be incorrectly parsed as HTML.") logger.printerr("Warning: "..word.." tags may contain text that would be incorrectly parsed as HTML.")
-- made possible because of the whitespace removal at the start -- made possible because of the whitespace removal at the start
i = content:find("</"..word) - 1 i = content:find("</"..word, i, true) - 1
end end
end end
@ -292,9 +292,9 @@ function M.tokenise( content )
text_memory = "" text_memory = ""
-- advance to closing ">" -- advance to closing ">"
i = content:find(">", i) i = content:find(">", i, true)
-- made possible because of the whitespace removal at the start -- made possible because of the whitespace removal at the start
i = content:find("</"..word) - 1 i = content:find("</"..word, i, true) - 1
end end
end end

165
main.lua
View File

@ -42,6 +42,7 @@ local function print_usage()
logger.print(" -1, --first-only: return only the first match") logger.print(" -1, --first-only: return only the first match")
logger.print(" -e, --errors: print warnings") logger.print(" -e, --errors: print warnings")
logger.print(" -t, --text: Print only the innerText of the matched elements") logger.print(" -t, --text: Print only the innerText of the matched elements")
logger.print(" -a, --select-attribute: Print the value of the attribute on matched elements. Supersedes -t.")
end end
@ -59,18 +60,26 @@ local FLAGS = {
FIRST_ONLY = {}, FIRST_ONLY = {},
DO_PRINT_ERRORS = {}, DO_PRINT_ERRORS = {},
INNER_TEXT = {}, INNER_TEXT = {},
SELECT_ATTRIBUTE = {}
} }
local LONGHAND_FLAGS = { local LONGHAND_FLAGS = {
["first-only"] = FLAGS.FIRST_ONLY, ["first-only"] = FLAGS.FIRST_ONLY,
["errors"] = FLAGS.DO_PRINT_ERRORS, ["errors"] = FLAGS.DO_PRINT_ERRORS,
["text"] = FLAGS.INNER_TEXT, ["text"] = FLAGS.INNER_TEXT,
["select-attribute"] = FLAGS.SELECT_ATTRIBUTE,
} }
local SHORTHAND_FLAGS = { local SHORTHAND_FLAGS = {
["1"] = FLAGS.FIRST_ONLY, ["1"] = FLAGS.FIRST_ONLY,
["e"] = FLAGS.DO_PRINT_ERRORS, ["e"] = FLAGS.DO_PRINT_ERRORS,
["t"] = FLAGS.INNER_TEXT, ["t"] = FLAGS.INNER_TEXT,
["a"] = FLAGS.SELECT_ATTRIBUTE,
}
local FLAG_NEEDS_VALUE = {
[FLAGS.SELECT_ATTRIBUTE] = true,
} }
@ -84,44 +93,103 @@ end
local flags = {} local flags = {}
local positionals = {} local positionals = {}
for _, argument in ipairs(arg) do local i = 1
if argument:match("^%-%w+$") then while i <= #arg do
for letter in argument:sub(2):gmatch("(%w)") do local argument = arg[i]
if not SHORTHAND_FLAGS[letter] then
logger.printerr("Unknown flag: -"..letter..".")
print_usage()
os.exit( RETURN_CODES.ARGUMENTS_ERROR )
end
-- Handle shorthand flags (-a, -1, etc.)
if argument:match("^%-%w+$") then
local flag_str = argument:sub(2)
-- Handle single-letter flags
if #flag_str == 1 then
local letter = flag_str
local flag = SHORTHAND_FLAGS[letter] local flag = SHORTHAND_FLAGS[letter]
if flags[flag] then if not flag then
logger.printerr("Warning: passed -" .. letter .. " flag already !") logger.printerr("Unknown flag: -"..letter)
print_usage()
os.exit(RETURN_CODES.ARGUMENTS_ERROR)
end end
-- Handle flags that require values
if FLAG_NEEDS_VALUE[flag] then
if i == #arg then
logger.printerr("Flag -"..letter.." requires a value")
os.exit(RETURN_CODES.ARGUMENTS_ERROR)
end
flags[flag] = arg[i+1]
i = i + 2 -- Skip next argument as it's the value
else
-- Handle regular boolean flags
if flags[flag] then
logger.printerr("Warning: passed -"..letter.." flag already!")
end
flags[flag] = true
i = i + 1
end
else
-- Handle grouped flags (-abc)
for letter in flag_str:gmatch("(%w)") do
local flag = SHORTHAND_FLAGS[letter]
if not flag then
logger.printerr("Unknown flag in group: -"..letter)
print_usage()
os.exit(RETURN_CODES.ARGUMENTS_ERROR)
end
if FLAG_NEEDS_VALUE[flag] then
logger.printerr("Cannot use value-taking flags in groups: -"..letter)
os.exit(RETURN_CODES.ARGUMENTS_ERROR)
end
if flags[flag] then
logger.printerr("Warning: passed -"..letter.." flag already!")
end
flags[flag] = true flags[flag] = true
end end
elseif argument:match("^%-%-[%w%-]+$") then i = i + 1
local flagname = argument:sub(3)
if not LONGHAND_FLAGS[flagname] then
logger.printerr("Unknown flag: --"..flagname..".")
print_usage()
os.exit( RETURN_CODES.ARGUMENTS_ERROR )
end end
-- Handle long flags (--flag)
elseif argument:match("^%-%-") then
local flagname = argument:sub(3)
local flag = LONGHAND_FLAGS[flagname] local flag = LONGHAND_FLAGS[flagname]
if flags[flag] then if not flag then
logger.printerr("Warning: passed --" .. flagname .. " flag already !") logger.printerr("Unknown flag: --"..flagname)
print_usage()
os.exit(RETURN_CODES.ARGUMENTS_ERROR)
end end
flags[flag] = true -- Handle flags that require values
if FLAG_NEEDS_VALUE[flag] then
if i == #arg then
logger.printerr("Flag --"..flagname.." requires a value")
os.exit(RETURN_CODES.ARGUMENTS_ERROR)
end
flags[flag] = arg[i+1]
i = i + 2 -- Skip next argument as it's the value
else else
table.insert( positionals, argument ) -- Handle regular boolean flags
if flags[flag] then
logger.printerr("Warning: passed --"..flagname.." flag already!")
end
flags[flag] = true
i = i + 1
end
else
-- Handle positional arguments
table.insert(positionals, argument)
i = i + 1
end end
end end
if flags[ FLAGS.DO_PRINT_ERRORS ] then if flags[ FLAGS.DO_PRINT_ERRORS ] then
logger.enable_printing_errors() logger.enable_printing_errors()
end end
@ -238,24 +306,51 @@ if #elements == 0 then
os.exit( RETURN_CODES.NOTHING_FOUND ) os.exit( RETURN_CODES.NOTHING_FOUND )
end end
local MAX_NUMBER_OF_ELEMENTS_TO_SHOW = #elements
if flags[FLAGS.FIRST_ONLY] then if flags[FLAGS.FIRST_ONLY] then
if #elements > 0 then MAX_NUMBER_OF_ELEMENTS_TO_SHOW = 1
if flags[FLAGS.INNER_TEXT] then
logger.print( elements[1]:inner_text() )
os.exit( RETURN_CODES.OK )
end
logger.print( HTML.tostring( elements[1] ) )
end
os.exit( RETURN_CODES.OK )
end end
for _, el in ipairs(elements) do
if flags[FLAGS.INNER_TEXT] then
logger.print( el:inner_text() )
local attr = flags[FLAGS.SELECT_ATTRIBUTE]
if attr then
local spoof_nil = {}
local attrs = {}
local i = 1
while i <= MAX_NUMBER_OF_ELEMENTS_TO_SHOW do
local el = elements[i]
local attribute_value = el.attributes[attr]
table.insert( attrs, attribute_value or spoof_nil )
i = i+1
end
local nb_non_nil_values = 0
for _, val in ipairs(attrs) do
if val ~= spoof_nil then
nb_non_nil_values = nb_non_nil_values + 1
end
end
if nb_non_nil_values == 0 then
os.exit( RETURN_CODES.NOTHING_FOUND )
end
for _, val in ipairs(attrs) do
if val ~= spoof_nil then
print(val)
else else
logger.print( HTML.tostring(el) ) print()
end end
end
os.exit( RETURN_CODES.OK )
end
end end