Compare commits
No commits in common. "e9c2553f88fd66e495de0e8a0511603be5d36867" and "111da7d6636451163c8d01e4caf1082673e52008" have entirely different histories.
e9c2553f88
...
111da7d663
176
css.lua
176
css.lua
|
|
@ -1,176 +0,0 @@
|
|||
local M = {}
|
||||
|
||||
local function trim(str)
|
||||
return str:match("^%s*(.-)%s*$")
|
||||
end
|
||||
|
||||
|
||||
local COMBINATORS = {
|
||||
DESCENDANT = {},
|
||||
DIRECT_DESCENDANT = {},
|
||||
NEXT_SIBLING = {},
|
||||
SUBSEQUENT_SIBLING = {},
|
||||
}
|
||||
M.COMBINATORS = COMBINATORS
|
||||
|
||||
local COMBINATOR_CHARS = {
|
||||
[">"] = COMBINATORS.DIRECT_DESCENDANT,
|
||||
["+"] = COMBINATORS.NEXT_SIBLING,
|
||||
["~"] = COMBINATORS.SUBSEQUENT_SIBLING
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
local function create_tokeniser(input)
|
||||
local pos = 1
|
||||
local len = #input
|
||||
|
||||
local function peek()
|
||||
if pos > len then return nil end
|
||||
return input:sub(pos, pos)
|
||||
end
|
||||
|
||||
local function next()
|
||||
local char = peek()
|
||||
if char then pos = pos + 1 end
|
||||
return char
|
||||
end
|
||||
|
||||
local function read_identifier()
|
||||
local result = ""
|
||||
while pos <= len do
|
||||
local char = peek()
|
||||
if char and char:match("[%w-]") then
|
||||
result = result .. next()
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
return {
|
||||
peek = peek,
|
||||
next = next,
|
||||
read_identifier = read_identifier,
|
||||
pos = function() return pos end
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
local function parse_compound_selector( tokeniser )
|
||||
local selector = {
|
||||
tag_name = nil,
|
||||
id = nil,
|
||||
class = {},
|
||||
attributes = {},
|
||||
}
|
||||
|
||||
--local selectors = {}
|
||||
|
||||
-- Parse first part (type or universal)
|
||||
local char = tokeniser.peek()
|
||||
if char == "*" then
|
||||
tokeniser.next()
|
||||
--table.insert(selectors, {type = "universal"})
|
||||
selector.tag_name = "*"
|
||||
elseif char and char:match("[%w-]") then
|
||||
local name = tokeniser.read_identifier()
|
||||
if name ~= "" then
|
||||
--table.insert(selectors, {type = "type", value = name})
|
||||
selector.tag_name = name
|
||||
end
|
||||
end
|
||||
|
||||
-- Parse additional class or ID selectors
|
||||
while true do
|
||||
char = tokeniser.peek()
|
||||
if not char then break end
|
||||
|
||||
if char == "." then
|
||||
tokeniser.next() -- consume '.'
|
||||
local name = tokeniser.read_identifier()
|
||||
if name == "" then
|
||||
error("Expected class name at position " .. tokeniser.pos())
|
||||
end
|
||||
--table.insert(selectors, {type = "class", value = name})
|
||||
table.insert( selector.class, name )
|
||||
elseif char == "#" then
|
||||
tokeniser.next() -- consume '#'
|
||||
local name = tokeniser.read_identifier()
|
||||
if name == "" then
|
||||
error("Expected id at position " .. tokeniser.pos())
|
||||
end
|
||||
--table.insert(selectors, {type = "id", value = name})
|
||||
selector.id = name
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return selector
|
||||
end
|
||||
|
||||
|
||||
local function parse_combinator( tokeniser )
|
||||
-- Skip leading whitespace
|
||||
while tokeniser.peek() and tokeniser.peek():match("%s") do
|
||||
tokeniser.next()
|
||||
end
|
||||
|
||||
local char = tokeniser.peek()
|
||||
if not char then return nil end
|
||||
|
||||
if char == ">" or char == "+" or char == "~" then
|
||||
tokeniser.next()
|
||||
-- Skip trailing whitespace
|
||||
while tokeniser.peek() and tokeniser.peek():match("%s") do
|
||||
tokeniser.next()
|
||||
end
|
||||
return COMBINATOR_CHARS[char]
|
||||
else
|
||||
-- Make sure next character isn't an explicit combinator
|
||||
char = tokeniser.peek()
|
||||
if char and not (char == ">" or char == "+" or char == "~") then
|
||||
return COMBINATORS.DESCENDANT
|
||||
end
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
function M.parse( input )
|
||||
input = trim( input )
|
||||
|
||||
local tokeniser = create_tokeniser( input )
|
||||
|
||||
local output = { selector = parse_compound_selector( tokeniser ) }
|
||||
local current = output
|
||||
|
||||
-- Parse combinations of combinators and compound selectors
|
||||
while true do
|
||||
local combinator = parse_combinator( tokeniser )
|
||||
if not combinator then
|
||||
current.combinator = nil
|
||||
current.next = nil
|
||||
break
|
||||
end
|
||||
|
||||
local next_selector = parse_compound_selector( tokeniser )
|
||||
current.combinator = combinator
|
||||
current.next = { selector = next_selector }
|
||||
current = current.next
|
||||
end
|
||||
|
||||
return output
|
||||
end
|
||||
|
||||
|
||||
return M
|
||||
|
||||
212
html.lua
212
html.lua
|
|
@ -3,13 +3,6 @@ local function trim(str)
|
|||
return str:match("^%s*(.-)%s*$")
|
||||
end
|
||||
|
||||
local function shallow_copy(t)
|
||||
local t2 = {}
|
||||
for k,v in pairs(t) do
|
||||
t2[k] = v
|
||||
end
|
||||
return t2
|
||||
end
|
||||
|
||||
|
||||
local M = {}
|
||||
|
|
@ -42,50 +35,10 @@ local VOID_TAGS = {
|
|||
function M.make_dom_element( tag_name, parent_elem )
|
||||
local o = {
|
||||
tag_name = tag_name,
|
||||
attributes = {},
|
||||
content = "",
|
||||
|
||||
children = {},
|
||||
parent = parent_elem,
|
||||
|
||||
get_child_index = function( self )
|
||||
if not self.parent then
|
||||
return -1
|
||||
end
|
||||
|
||||
for i, child in ipairs(self.parent.children) do
|
||||
if child == self then return i end
|
||||
end
|
||||
end,
|
||||
|
||||
get_next_sibling = function( self )
|
||||
if not self.parent then return nil end
|
||||
|
||||
local found_self = false
|
||||
for _, child in ipairs(self.parent.children) do
|
||||
if found_self then
|
||||
return child
|
||||
end
|
||||
|
||||
if child == self then
|
||||
found_self = true
|
||||
end
|
||||
end
|
||||
|
||||
return nil
|
||||
end,
|
||||
|
||||
check_simple_selector = function( self, selector )
|
||||
return M.check_simple_selector( self, selector )
|
||||
end,
|
||||
|
||||
foreach = function( self, fn )
|
||||
fn( self )
|
||||
|
||||
for _, child in ipairs(self.children or {}) do
|
||||
child:foreach( fn )
|
||||
end
|
||||
end
|
||||
children = {},
|
||||
attributes = {},
|
||||
content = ""
|
||||
}
|
||||
|
||||
if parent_elem then
|
||||
|
|
@ -307,64 +260,6 @@ function M.tokenise( content )
|
|||
end
|
||||
|
||||
|
||||
function M.check_simple_selector(element, selector)
|
||||
-- Skip text nodes
|
||||
if element.tag_name == ":text" then
|
||||
return false
|
||||
end
|
||||
|
||||
-- Check tag name if specified
|
||||
if selector.tag_name and element.tag_name ~= selector.tag_name then
|
||||
return false
|
||||
end
|
||||
|
||||
-- Check ID if specified
|
||||
if selector.id and element.attributes.id ~= selector.id then
|
||||
return false
|
||||
end
|
||||
|
||||
-- Check classes if specified
|
||||
if selector.class and #selector.class > 0 then
|
||||
local element_classes = element.attributes.class
|
||||
if not element_classes then
|
||||
return false
|
||||
end
|
||||
|
||||
for _, class in ipairs(selector.class) do
|
||||
local found = false
|
||||
for _, elem_class in ipairs(element_classes) do
|
||||
if elem_class == class then
|
||||
found = true
|
||||
break
|
||||
end
|
||||
end
|
||||
if not found then
|
||||
return false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
function M.query_simple_selector(document, selector)
|
||||
local matches = {}
|
||||
|
||||
local function traverse(node)
|
||||
if M.check_simple_selector(node, selector) then
|
||||
table.insert(matches, node)
|
||||
end
|
||||
|
||||
for _, child in ipairs(node.children) do
|
||||
traverse(child)
|
||||
end
|
||||
end
|
||||
|
||||
traverse(document)
|
||||
return matches
|
||||
end
|
||||
|
||||
|
||||
function M.parse_tokens_into_document( TOKENS )
|
||||
local DOCUMENT = M.make_dom_element(nil, nil)
|
||||
local current_doc_element = DOCUMENT
|
||||
|
|
@ -375,7 +270,7 @@ function M.parse_tokens_into_document( TOKENS )
|
|||
local token = TOKENS[i]
|
||||
|
||||
if token.type == "WORD" then
|
||||
if current_doc_element.tag_name == ":text" then
|
||||
if current_doc_element.tag_name == "#text" then
|
||||
current_doc_element = current_doc_element.parent
|
||||
end
|
||||
|
||||
|
|
@ -427,21 +322,18 @@ function M.parse_tokens_into_document( TOKENS )
|
|||
end
|
||||
|
||||
|
||||
|
||||
local value = nil
|
||||
if raw_value == "" or raw_value == nil then
|
||||
value = nil
|
||||
--elseif raw_value:find("%S+%s+%S+") then
|
||||
-- value = {}
|
||||
-- print(raw_value)
|
||||
-- for word in raw_value:gmatch("%S+") do
|
||||
-- table.insert( value, word )
|
||||
-- end
|
||||
else
|
||||
value = trim(raw_value)
|
||||
|
||||
if name == "class" then
|
||||
local classes = {}
|
||||
|
||||
for class in value:gmatch("%S+") do
|
||||
table.insert( classes, class )
|
||||
end
|
||||
|
||||
value = classes
|
||||
end
|
||||
end
|
||||
|
||||
current_doc_element.attributes[name] = value
|
||||
|
|
@ -472,7 +364,7 @@ function M.parse_tokens_into_document( TOKENS )
|
|||
|
||||
|
||||
if token.type == "TEXT" then
|
||||
local new_elem = M.make_dom_element( ":text", current_doc_element )
|
||||
local new_elem = M.make_dom_element( "#text", current_doc_element )
|
||||
new_elem.content = token.value
|
||||
current_doc_element = new_elem
|
||||
|
||||
|
|
@ -484,20 +376,18 @@ function M.parse_tokens_into_document( TOKENS )
|
|||
i = i+1
|
||||
end
|
||||
|
||||
M.clean_text_nodes( DOCUMENT )
|
||||
|
||||
return DOCUMENT
|
||||
end
|
||||
|
||||
|
||||
function M.clean_text_nodes(node)
|
||||
if node.tag_name ~= ":text" then
|
||||
if node.tag_name ~= "#text" then
|
||||
-- Don't clean anything in raw text tags
|
||||
if RAW_TEXT_TAGS[node.tag_name] then
|
||||
return
|
||||
end
|
||||
|
||||
for _, child in ipairs( shallow_copy(node.children) ) do
|
||||
for _, child in ipairs(node.children) do
|
||||
M.clean_text_nodes( child )
|
||||
end
|
||||
return
|
||||
|
|
@ -509,7 +399,7 @@ function M.clean_text_nodes(node)
|
|||
error("Text node without a parent; should be impossible !")
|
||||
end
|
||||
|
||||
for i, child in ipairs( shallow_copy(node.parent.children) ) do
|
||||
for i, child in ipairs(node.parent.children) do
|
||||
if child == node then
|
||||
table.remove( node.parent.children, i )
|
||||
break
|
||||
|
|
@ -523,91 +413,47 @@ function M.clean_text_nodes(node)
|
|||
end
|
||||
|
||||
|
||||
function M.tostring(node, indent, include_internal_pseudoelements)
|
||||
function M.print_document(node, indent)
|
||||
-- Default indentation is 0 (root level)
|
||||
indent = indent or 0
|
||||
include_internal_pseudoelements = include_internal_pseudoelements or false
|
||||
|
||||
local is_pseudo_element = (node.tag_name or ":root"):sub(1,1) == ":"
|
||||
|
||||
|
||||
local indent_level_str = " "
|
||||
-- Create the indentation string (e.g., " " for each level)
|
||||
local indent_str = string.rep(indent_level_str, indent)
|
||||
|
||||
if node.tag_name == ":text" then
|
||||
local str = ""
|
||||
|
||||
if include_internal_pseudoelements then
|
||||
str = str .. "<:text>"
|
||||
end
|
||||
|
||||
str = str .. node.content
|
||||
|
||||
if include_internal_pseudoelements then
|
||||
str = str .. "</:text>"
|
||||
end
|
||||
|
||||
return str
|
||||
if node.tag_name == "#text" then
|
||||
print(indent_str .. "<#text>\n" .. node.content .. "\n" .. indent_str .. "</#text>")
|
||||
return
|
||||
end
|
||||
|
||||
local node_name = ""
|
||||
|
||||
if not is_pseudo_element or include_internal_pseudoelements then
|
||||
-- Print the current node's tag name
|
||||
node_name = node_name .. "\n" .. indent_str .. "<" .. (node.tag_name or ":root")
|
||||
end
|
||||
node_name = node_name .. indent_str .. "<" .. (node.tag_name or "#root")
|
||||
|
||||
-- Print attributes if any
|
||||
if next(node.attributes) ~= nil then
|
||||
for attr, value in pairs(node.attributes) do
|
||||
if type(value) == "table" then
|
||||
node_name = node_name .. " " .. attr .. "=\""
|
||||
for i, val in ipairs( value ) do
|
||||
if i > 1 then node_name = node_name .. " " end
|
||||
node_name = node_name .. tostring(val)
|
||||
end
|
||||
node_name = node_name .. "\""
|
||||
else
|
||||
node_name = node_name .. " " .. attr .. "=\"" .. tostring(value) .. "\""
|
||||
end
|
||||
--print(indent_str .. " " .. attr .. " = " .. tostring(value))
|
||||
node_name = node_name .. " " .. attr .. "=\"" .. tostring(value) .. "\""
|
||||
end
|
||||
end
|
||||
|
||||
if not is_pseudo_element or include_internal_pseudoelements then
|
||||
node_name = node_name .. ">"
|
||||
end
|
||||
node_name = node_name .. ">"
|
||||
|
||||
--print( node_name )
|
||||
print( node_name )
|
||||
|
||||
local next_indent = indent + 1
|
||||
if is_pseudo_element and not include_internal_pseudoelements then
|
||||
next_indent = indent
|
||||
end
|
||||
|
||||
-- Recursively print children
|
||||
for _, child in ipairs(node.children) do
|
||||
node_name = node_name .. M.tostring(child, next_indent, include_internal_pseudoelements)
|
||||
M.print_document(child, indent + 1)
|
||||
end
|
||||
|
||||
if not VOID_TAGS[node.tag_name] and ( not is_pseudo_element or include_internal_pseudoelements ) then
|
||||
-- Print the closing tag
|
||||
local end_indent = ""
|
||||
local closing_text_tag = "</:text>"
|
||||
if node_name:sub(#node_name, #node_name) == ">" and node_name:sub(#node_name - #closing_text_tag + 1, #node_name) ~= closing_text_tag then
|
||||
end_indent = "\n" .. indent_str
|
||||
end
|
||||
node_name = node_name .. end_indent .. "</" .. (node.tag_name or ":root") .. ">"
|
||||
end
|
||||
|
||||
return node_name
|
||||
-- Print the closing tag
|
||||
print(indent_str .. "</" .. (node.tag_name or "#root") .. ">")
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
function M.parse( html_string )
|
||||
local clean_html = M.preprocess( html_string )
|
||||
|
||||
|
|
@ -615,7 +461,9 @@ function M.parse( html_string )
|
|||
|
||||
local document = M.parse_tokens_into_document( tokens )
|
||||
|
||||
return document
|
||||
local cleaned_doc = M.clean_text_nodes( document )
|
||||
|
||||
return cleaned_doc
|
||||
end
|
||||
|
||||
return M
|
||||
|
|
|
|||
95
main.lua
95
main.lua
|
|
@ -1,10 +1,9 @@
|
|||
#!/bin/env lua
|
||||
|
||||
local html = require(".html")
|
||||
local css = require(".css")
|
||||
|
||||
|
||||
local file = io.open("small.html", "r")
|
||||
local file = io.open("test.html", "r")
|
||||
|
||||
if file == nil then
|
||||
error("File doesn't exist")
|
||||
|
|
@ -12,94 +11,4 @@ end
|
|||
|
||||
local content = file:read("a")
|
||||
|
||||
local doc = html.parse( content )
|
||||
|
||||
|
||||
print("Write a css selector:")
|
||||
local whole_selector = css.parse( io.read() )
|
||||
local current_selector = whole_selector
|
||||
|
||||
|
||||
local elements = {}
|
||||
-- start with all elements matching the first selector
|
||||
doc:foreach(function( el )
|
||||
if el:check_simple_selector( current_selector.selector ) then
|
||||
table.insert( elements, el )
|
||||
end
|
||||
end)
|
||||
|
||||
while current_selector.combinator ~= nil do
|
||||
local next_selector = current_selector.next
|
||||
|
||||
local new_elements = {}
|
||||
|
||||
if current_selector.combinator == css.COMBINATORS.DESCENDANT then
|
||||
for _, element in ipairs( elements ) do
|
||||
element:foreach(function( el )
|
||||
if el:check_simple_selector( next_selector.selector ) then
|
||||
table.insert( new_elements, el )
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
goto continue
|
||||
end
|
||||
|
||||
if current_selector.combinator == css.COMBINATORS.DIRECT_DESCENDANT then
|
||||
for _, element in ipairs( elements ) do
|
||||
for _, child in ipairs( element.children ) do
|
||||
if child:check_simple_selector( next_selector.selector ) then
|
||||
table.insert( new_elements, child )
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
goto continue
|
||||
end
|
||||
|
||||
if current_selector.combinator == css.COMBINATORS.NEXT_SIBLING then
|
||||
for _, element in ipairs( elements ) do
|
||||
local next_sibling = element:get_next_sibling()
|
||||
while next_sibling and next_sibling.tag_name == ":text" do
|
||||
next_sibling = next_sibling:get_next_sibling()
|
||||
end
|
||||
|
||||
if next_sibling and next_sibling:check_simple_selector( next_selector.selector ) then
|
||||
table.insert( new_elements, next_sibling )
|
||||
end
|
||||
end
|
||||
|
||||
goto continue
|
||||
end
|
||||
|
||||
if current_selector.combinator == css.COMBINATORS.SUBSEQUENT_SIBLING then
|
||||
for _, element in ipairs( elements ) do
|
||||
local sibling = element:get_next_sibling()
|
||||
while sibling ~= nil do
|
||||
if sibling:check_simple_selector( next_selector.selector ) then
|
||||
table.insert( new_elements, sibling )
|
||||
end
|
||||
|
||||
sibling = sibling:get_next_sibling()
|
||||
end
|
||||
end
|
||||
|
||||
goto continue
|
||||
end
|
||||
|
||||
::continue::
|
||||
elements = new_elements
|
||||
current_selector = next_selector
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
for _, el in ipairs(elements) do
|
||||
print( html.tostring( el ) )
|
||||
end
|
||||
|
||||
|
||||
|
||||
html.print_document( html.parse( content ) )
|
||||
|
|
|
|||
60
small.html
60
small.html
|
|
@ -1,60 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>HTMLQ Test Document</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="main" class="container wrapper">
|
||||
<header class="header nav-bar">
|
||||
<h1>HTMLQ Test Document</h1>
|
||||
<nav class="nav" data-nav-type="main">
|
||||
<ul>
|
||||
<li><a href="#" data-link-type="internal">Home</a></li>
|
||||
<li><a href="#" data-link-type="external">About</a></li>
|
||||
<li><a href="#" data-link-type="internal">Contact</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</header>
|
||||
<main class="content main-content">
|
||||
<section class="section featured" id="featured-section">
|
||||
<h2>Featured Section</h2>
|
||||
<p>This is the featured section.</p>
|
||||
<div class="card featured-card" data-card-type="featured">
|
||||
<h3>Featured Card</h3>
|
||||
<p>This is the featured card.</p>
|
||||
<ul>
|
||||
<li>Item 1</li>
|
||||
<li>Item 2</li>
|
||||
<li>Item 3</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
<section class="section regular" id="regular-section">
|
||||
<h2>Regular Section</h2>
|
||||
<p>This is the regular section.</p>
|
||||
<div class="card regular-card" data-card-type="regular">
|
||||
<h3>Regular Card</h3>
|
||||
<p>This is the regular card.</p>
|
||||
<ol>
|
||||
<li>Item 1</li>
|
||||
<li>Item 2</li>
|
||||
<li>Item 3</li>
|
||||
</ol>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
<footer class="footer nav-bar" data-footer-type="main">
|
||||
<p>© 2024 HTMLQ Test Document</p>
|
||||
<nav class="nav" data-nav-type="footer">
|
||||
<ul>
|
||||
<li><a href="#" data-link-type="internal">Home</a></li>
|
||||
<li><a href="#" data-link-type="external">About</a></li>
|
||||
<li><a href="#" data-link-type="internal">Contact</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</footer>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Reference in New Issue