Compare commits

...

9 Commits

4 changed files with 511 additions and 32 deletions

176
css.lua Normal file
View File

@ -0,0 +1,176 @@
local M = {}
local function trim(str)
return str:match("^%s*(.-)%s*$")
end
local COMBINATORS = {
DESCENDANT = {},
DIRECT_DESCENDANT = {},
NEXT_SIBLING = {},
SUBSEQUENT_SIBLING = {},
}
M.COMBINATORS = COMBINATORS
local COMBINATOR_CHARS = {
[">"] = COMBINATORS.DIRECT_DESCENDANT,
["+"] = COMBINATORS.NEXT_SIBLING,
["~"] = COMBINATORS.SUBSEQUENT_SIBLING
}
local function create_tokeniser(input)
local pos = 1
local len = #input
local function peek()
if pos > len then return nil end
return input:sub(pos, pos)
end
local function next()
local char = peek()
if char then pos = pos + 1 end
return char
end
local function read_identifier()
local result = ""
while pos <= len do
local char = peek()
if char and char:match("[%w-]") then
result = result .. next()
else
break
end
end
return result
end
return {
peek = peek,
next = next,
read_identifier = read_identifier,
pos = function() return pos end
}
end
local function parse_compound_selector( tokeniser )
local selector = {
tag_name = nil,
id = nil,
class = {},
attributes = {},
}
--local selectors = {}
-- Parse first part (type or universal)
local char = tokeniser.peek()
if char == "*" then
tokeniser.next()
--table.insert(selectors, {type = "universal"})
selector.tag_name = "*"
elseif char and char:match("[%w-]") then
local name = tokeniser.read_identifier()
if name ~= "" then
--table.insert(selectors, {type = "type", value = name})
selector.tag_name = name
end
end
-- Parse additional class or ID selectors
while true do
char = tokeniser.peek()
if not char then break end
if char == "." then
tokeniser.next() -- consume '.'
local name = tokeniser.read_identifier()
if name == "" then
error("Expected class name at position " .. tokeniser.pos())
end
--table.insert(selectors, {type = "class", value = name})
table.insert( selector.class, name )
elseif char == "#" then
tokeniser.next() -- consume '#'
local name = tokeniser.read_identifier()
if name == "" then
error("Expected id at position " .. tokeniser.pos())
end
--table.insert(selectors, {type = "id", value = name})
selector.id = name
else
break
end
end
return selector
end
local function parse_combinator( tokeniser )
-- Skip leading whitespace
while tokeniser.peek() and tokeniser.peek():match("%s") do
tokeniser.next()
end
local char = tokeniser.peek()
if not char then return nil end
if char == ">" or char == "+" or char == "~" then
tokeniser.next()
-- Skip trailing whitespace
while tokeniser.peek() and tokeniser.peek():match("%s") do
tokeniser.next()
end
return COMBINATOR_CHARS[char]
else
-- Make sure next character isn't an explicit combinator
char = tokeniser.peek()
if char and not (char == ">" or char == "+" or char == "~") then
return COMBINATORS.DESCENDANT
end
end
return nil
end
function M.parse( input )
input = trim( input )
local tokeniser = create_tokeniser( input )
local output = { selector = parse_compound_selector( tokeniser ) }
local current = output
-- Parse combinations of combinators and compound selectors
while true do
local combinator = parse_combinator( tokeniser )
if not combinator then
current.combinator = nil
current.next = nil
break
end
local next_selector = parse_compound_selector( tokeniser )
current.combinator = combinator
current.next = { selector = next_selector }
current = current.next
end
return output
end
return M

206
html.lua
View File

@ -3,6 +3,13 @@ local function trim(str)
return str:match("^%s*(.-)%s*$")
end
local function shallow_copy(t)
local t2 = {}
for k,v in pairs(t) do
t2[k] = v
end
return t2
end
local M = {}
@ -35,10 +42,50 @@ local VOID_TAGS = {
function M.make_dom_element( tag_name, parent_elem )
local o = {
tag_name = tag_name,
parent = parent_elem,
children = {},
attributes = {},
content = ""
content = "",
children = {},
parent = parent_elem,
get_child_index = function( self )
if not self.parent then
return -1
end
for i, child in ipairs(self.parent.children) do
if child == self then return i end
end
end,
get_next_sibling = function( self )
if not self.parent then return nil end
local found_self = false
for _, child in ipairs(self.parent.children) do
if found_self then
return child
end
if child == self then
found_self = true
end
end
return nil
end,
check_simple_selector = function( self, selector )
return M.check_simple_selector( self, selector )
end,
foreach = function( self, fn )
fn( self )
for _, child in ipairs(self.children or {}) do
child:foreach( fn )
end
end
}
if parent_elem then
@ -260,6 +307,64 @@ function M.tokenise( content )
end
function M.check_simple_selector(element, selector)
-- Skip text nodes
if element.tag_name == ":text" then
return false
end
-- Check tag name if specified
if selector.tag_name and element.tag_name ~= selector.tag_name then
return false
end
-- Check ID if specified
if selector.id and element.attributes.id ~= selector.id then
return false
end
-- Check classes if specified
if selector.class and #selector.class > 0 then
local element_classes = element.attributes.class
if not element_classes then
return false
end
for _, class in ipairs(selector.class) do
local found = false
for _, elem_class in ipairs(element_classes) do
if elem_class == class then
found = true
break
end
end
if not found then
return false
end
end
end
return true
end
function M.query_simple_selector(document, selector)
local matches = {}
local function traverse(node)
if M.check_simple_selector(node, selector) then
table.insert(matches, node)
end
for _, child in ipairs(node.children) do
traverse(child)
end
end
traverse(document)
return matches
end
function M.parse_tokens_into_document( TOKENS )
local DOCUMENT = M.make_dom_element(nil, nil)
local current_doc_element = DOCUMENT
@ -270,7 +375,7 @@ function M.parse_tokens_into_document( TOKENS )
local token = TOKENS[i]
if token.type == "WORD" then
if current_doc_element.tag_name == "#text" then
if current_doc_element.tag_name == ":text" then
current_doc_element = current_doc_element.parent
end
@ -322,18 +427,21 @@ function M.parse_tokens_into_document( TOKENS )
end
local value = nil
if raw_value == "" or raw_value == nil then
value = nil
--elseif raw_value:find("%S+%s+%S+") then
-- value = {}
-- print(raw_value)
-- for word in raw_value:gmatch("%S+") do
-- table.insert( value, word )
-- end
else
value = trim(raw_value)
if name == "class" then
local classes = {}
for class in value:gmatch("%S+") do
table.insert( classes, class )
end
value = classes
end
end
current_doc_element.attributes[name] = value
@ -364,7 +472,7 @@ function M.parse_tokens_into_document( TOKENS )
if token.type == "TEXT" then
local new_elem = M.make_dom_element( "#text", current_doc_element )
local new_elem = M.make_dom_element( ":text", current_doc_element )
new_elem.content = token.value
current_doc_element = new_elem
@ -376,18 +484,20 @@ function M.parse_tokens_into_document( TOKENS )
i = i+1
end
M.clean_text_nodes( DOCUMENT )
return DOCUMENT
end
function M.clean_text_nodes(node)
if node.tag_name ~= "#text" then
if node.tag_name ~= ":text" then
-- Don't clean anything in raw text tags
if RAW_TEXT_TAGS[node.tag_name] then
return
end
for _, child in ipairs(node.children) do
for _, child in ipairs( shallow_copy(node.children) ) do
M.clean_text_nodes( child )
end
return
@ -399,7 +509,7 @@ function M.clean_text_nodes(node)
error("Text node without a parent; should be impossible !")
end
for i, child in ipairs(node.parent.children) do
for i, child in ipairs( shallow_copy(node.parent.children) ) do
if child == node then
table.remove( node.parent.children, i )
break
@ -413,47 +523,91 @@ function M.clean_text_nodes(node)
end
function M.print_document(node, indent)
function M.tostring(node, indent, include_internal_pseudoelements)
-- Default indentation is 0 (root level)
indent = indent or 0
include_internal_pseudoelements = include_internal_pseudoelements or false
local is_pseudo_element = (node.tag_name or ":root"):sub(1,1) == ":"
local indent_level_str = " "
-- Create the indentation string (e.g., " " for each level)
local indent_str = string.rep(indent_level_str, indent)
if node.tag_name == "#text" then
print(indent_str .. "<#text>\n" .. node.content .. "\n" .. indent_str .. "</#text>")
return
if node.tag_name == ":text" then
local str = ""
if include_internal_pseudoelements then
str = str .. "<:text>"
end
str = str .. node.content
if include_internal_pseudoelements then
str = str .. "</:text>"
end
return str
end
local node_name = ""
if not is_pseudo_element or include_internal_pseudoelements then
-- Print the current node's tag name
node_name = node_name .. indent_str .. "<" .. (node.tag_name or "#root")
node_name = node_name .. "\n" .. indent_str .. "<" .. (node.tag_name or ":root")
end
-- Print attributes if any
if next(node.attributes) ~= nil then
for attr, value in pairs(node.attributes) do
--print(indent_str .. " " .. attr .. " = " .. tostring(value))
if type(value) == "table" then
node_name = node_name .. " " .. attr .. "=\""
for i, val in ipairs( value ) do
if i > 1 then node_name = node_name .. " " end
node_name = node_name .. tostring(val)
end
node_name = node_name .. "\""
else
node_name = node_name .. " " .. attr .. "=\"" .. tostring(value) .. "\""
end
end
end
if not is_pseudo_element or include_internal_pseudoelements then
node_name = node_name .. ">"
end
print( node_name )
--print( node_name )
local next_indent = indent + 1
if is_pseudo_element and not include_internal_pseudoelements then
next_indent = indent
end
-- Recursively print children
for _, child in ipairs(node.children) do
M.print_document(child, indent + 1)
node_name = node_name .. M.tostring(child, next_indent, include_internal_pseudoelements)
end
if not VOID_TAGS[node.tag_name] and ( not is_pseudo_element or include_internal_pseudoelements ) then
-- Print the closing tag
print(indent_str .. "</" .. (node.tag_name or "#root") .. ">")
local end_indent = ""
local closing_text_tag = "</:text>"
if node_name:sub(#node_name, #node_name) == ">" and node_name:sub(#node_name - #closing_text_tag + 1, #node_name) ~= closing_text_tag then
end_indent = "\n" .. indent_str
end
node_name = node_name .. end_indent .. "</" .. (node.tag_name or ":root") .. ">"
end
return node_name
end
function M.parse( html_string )
local clean_html = M.preprocess( html_string )
@ -461,9 +615,7 @@ function M.parse( html_string )
local document = M.parse_tokens_into_document( tokens )
local cleaned_doc = M.clean_text_nodes( document )
return cleaned_doc
return document
end
return M

View File

@ -1,9 +1,10 @@
#!/bin/env lua
local html = require(".html")
local css = require(".css")
local file = io.open("test.html", "r")
local file = io.open("small.html", "r")
if file == nil then
error("File doesn't exist")
@ -11,4 +12,94 @@ end
local content = file:read("a")
html.print_document( html.parse( content ) )
local doc = html.parse( content )
print("Write a css selector:")
local whole_selector = css.parse( io.read() )
local current_selector = whole_selector
local elements = {}
-- start with all elements matching the first selector
doc:foreach(function( el )
if el:check_simple_selector( current_selector.selector ) then
table.insert( elements, el )
end
end)
while current_selector.combinator ~= nil do
local next_selector = current_selector.next
local new_elements = {}
if current_selector.combinator == css.COMBINATORS.DESCENDANT then
for _, element in ipairs( elements ) do
element:foreach(function( el )
if el:check_simple_selector( next_selector.selector ) then
table.insert( new_elements, el )
end
end)
end
goto continue
end
if current_selector.combinator == css.COMBINATORS.DIRECT_DESCENDANT then
for _, element in ipairs( elements ) do
for _, child in ipairs( element.children ) do
if child:check_simple_selector( next_selector.selector ) then
table.insert( new_elements, child )
end
end
end
goto continue
end
if current_selector.combinator == css.COMBINATORS.NEXT_SIBLING then
for _, element in ipairs( elements ) do
local next_sibling = element:get_next_sibling()
while next_sibling and next_sibling.tag_name == ":text" do
next_sibling = next_sibling:get_next_sibling()
end
if next_sibling and next_sibling:check_simple_selector( next_selector.selector ) then
table.insert( new_elements, next_sibling )
end
end
goto continue
end
if current_selector.combinator == css.COMBINATORS.SUBSEQUENT_SIBLING then
for _, element in ipairs( elements ) do
local sibling = element:get_next_sibling()
while sibling ~= nil do
if sibling:check_simple_selector( next_selector.selector ) then
table.insert( new_elements, sibling )
end
sibling = sibling:get_next_sibling()
end
end
goto continue
end
::continue::
elements = new_elements
current_selector = next_selector
end
for _, el in ipairs(elements) do
print( html.tostring( el ) )
end

60
small.html Normal file
View File

@ -0,0 +1,60 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>HTMLQ Test Document</title>
</head>
<body>
<div id="main" class="container wrapper">
<header class="header nav-bar">
<h1>HTMLQ Test Document</h1>
<nav class="nav" data-nav-type="main">
<ul>
<li><a href="#" data-link-type="internal">Home</a></li>
<li><a href="#" data-link-type="external">About</a></li>
<li><a href="#" data-link-type="internal">Contact</a></li>
</ul>
</nav>
</header>
<main class="content main-content">
<section class="section featured" id="featured-section">
<h2>Featured Section</h2>
<p>This is the featured section.</p>
<div class="card featured-card" data-card-type="featured">
<h3>Featured Card</h3>
<p>This is the featured card.</p>
<ul>
<li>Item 1</li>
<li>Item 2</li>
<li>Item 3</li>
</ul>
</div>
</section>
<section class="section regular" id="regular-section">
<h2>Regular Section</h2>
<p>This is the regular section.</p>
<div class="card regular-card" data-card-type="regular">
<h3>Regular Card</h3>
<p>This is the regular card.</p>
<ol>
<li>Item 1</li>
<li>Item 2</li>
<li>Item 3</li>
</ol>
</div>
</section>
</main>
<footer class="footer nav-bar" data-footer-type="main">
<p>&copy; 2024 HTMLQ Test Document</p>
<nav class="nav" data-nav-type="footer">
<ul>
<li><a href="#" data-link-type="internal">Home</a></li>
<li><a href="#" data-link-type="external">About</a></li>
<li><a href="#" data-link-type="internal">Contact</a></li>
</ul>
</nav>
</footer>
</div>
</body>
</html>