fix: spaces around '=' sign in html tag attributes
parent
5e8c023559
commit
83b1ac0c83
343
html.lua
343
html.lua
|
@ -293,44 +293,15 @@ function M.tokenise( content )
|
||||||
local in_tag = nil
|
local in_tag = nil
|
||||||
local currently_opened_quotes = nil
|
local currently_opened_quotes = nil
|
||||||
local text_memory = ""
|
local text_memory = ""
|
||||||
|
local attr_name = nil -- Track attribute name when we encounter whitespace before equals
|
||||||
local i = 1
|
local i = 1
|
||||||
|
|
||||||
while i <= #content do
|
while i <= #content do
|
||||||
local char = content:sub(i,i)
|
local char = content:sub(i, i)
|
||||||
|
|
||||||
|
|
||||||
--
|
|
||||||
-- Taking care of quotes
|
|
||||||
--
|
|
||||||
if in_tag then
|
|
||||||
-- finding matching quotes
|
|
||||||
if currently_opened_quotes ~= nil and char == currently_opened_quotes then
|
|
||||||
currently_opened_quotes = nil
|
|
||||||
text_memory = text_memory .. char
|
|
||||||
goto continue
|
|
||||||
end
|
|
||||||
|
|
||||||
-- Opening a new set of quotes
|
|
||||||
if currently_opened_quotes == nil and (char == "'" or char == '"') then
|
|
||||||
currently_opened_quotes = char
|
|
||||||
text_memory = text_memory .. char
|
|
||||||
goto continue
|
|
||||||
end
|
|
||||||
|
|
||||||
-- reaching here means:
|
|
||||||
-- - we're in a tag, inside quotes
|
|
||||||
-- - the character is not the closing quote mark
|
|
||||||
-- So just add it and get on with it.
|
|
||||||
if currently_opened_quotes ~= nil then
|
|
||||||
text_memory = text_memory .. char
|
|
||||||
goto continue
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- Handle comments and doctype declarations
|
||||||
if char == "<" then
|
if char == "<" then
|
||||||
|
-- Handle comments
|
||||||
if content:sub(i, i+3) == "<!--" then
|
if content:sub(i, i+3) == "<!--" then
|
||||||
local end_i = content:find("-->", i+3, true)
|
local end_i = content:find("-->", i+3, true)
|
||||||
if end_i then
|
if end_i then
|
||||||
|
@ -338,66 +309,64 @@ function M.tokenise( content )
|
||||||
else
|
else
|
||||||
i = #content
|
i = #content
|
||||||
end
|
end
|
||||||
|
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Handle doctype declarations
|
||||||
if content:sub(i, i+1) == "<!" then
|
if content:sub(i, i+1) == "<!" then
|
||||||
i = content:find(">", i, true)
|
i = content:find(">", i, true)
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
|
|
||||||
---------------------------------
|
-- Save any accumulated text before starting a new tag
|
||||||
if #text_memory ~= 0 then
|
if #text_memory ~= 0 then
|
||||||
table.insert( TOKENS, {type="TEXT", value=text_memory} )
|
table.insert(TOKENS, {type="TEXT", value=text_memory})
|
||||||
text_memory = ""
|
text_memory = ""
|
||||||
end
|
end
|
||||||
|
|
||||||
-- closing tag
|
-- Reset attribute tracking
|
||||||
|
attr_name = nil
|
||||||
|
|
||||||
|
-- Handle closing tags
|
||||||
if content:sub(i, i+1) == "</" then
|
if content:sub(i, i+1) == "</" then
|
||||||
table.insert( TOKENS, {type="START_CLOSING_TAG"} )
|
table.insert(TOKENS, {type="START_CLOSING_TAG"})
|
||||||
in_tag = "closing"
|
in_tag = "closing"
|
||||||
i = i+1
|
i = i+1
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
|
|
||||||
table.insert( TOKENS, {type="START_OPENING_TAG"} )
|
-- Handle opening tags
|
||||||
|
table.insert(TOKENS, {type="START_OPENING_TAG"})
|
||||||
in_tag = "opening"
|
in_tag = "opening"
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Handle end of tag
|
||||||
if char == ">" and in_tag then
|
if char == ">" and in_tag and currently_opened_quotes == nil then
|
||||||
-- first, cleanup the text_memory, as the closing > is often side-by-side with the last "word"
|
-- Process any remaining text in the tag
|
||||||
if #text_memory ~= 0 then
|
if #text_memory ~= 0 then
|
||||||
local word = trim(text_memory)
|
local word = trim(text_memory)
|
||||||
if not word:match("^%s*$") then
|
if not word:match("^%s*$") then
|
||||||
table.insert( TOKENS, {type="WORD", value=word})
|
table.insert(TOKENS, {type="WORD", value=word})
|
||||||
end
|
end
|
||||||
text_memory = ""
|
text_memory = ""
|
||||||
end
|
end
|
||||||
|
|
||||||
table.insert( TOKENS, {type = "END_TAG"} )
|
-- Reset attribute tracking
|
||||||
|
attr_name = nil
|
||||||
|
|
||||||
-- closing tags don't require any more work.
|
table.insert(TOKENS, {type="END_TAG"})
|
||||||
if in_tag == "closing" then
|
|
||||||
in_tag = nil
|
|
||||||
goto continue
|
|
||||||
end
|
|
||||||
in_tag = nil
|
|
||||||
|
|
||||||
|
-- Handle special tags with raw content
|
||||||
|
if in_tag == "opening" then
|
||||||
local curr_token = #TOKENS
|
local curr_token = #TOKENS
|
||||||
while curr_token > 0 and TOKENS[curr_token].type ~= "START_OPENING_TAG" do
|
while curr_token > 0 and TOKENS[curr_token].type ~= "START_OPENING_TAG" do
|
||||||
curr_token = curr_token - 1
|
curr_token = curr_token - 1
|
||||||
end
|
end
|
||||||
curr_token = curr_token + 1
|
curr_token = curr_token + 1
|
||||||
|
|
||||||
if curr_token == 1 and TOKENS[curr_token].type ~= "START_OPENING_TAG" then
|
if curr_token <= #TOKENS and TOKENS[curr_token].type == "WORD" then
|
||||||
error("Error: Reached start of token stream while winding back to find tag name; Not supposed to be possible.")
|
|
||||||
end
|
|
||||||
|
|
||||||
local tagname = TOKENS[curr_token].value
|
local tagname = TOKENS[curr_token].value
|
||||||
|
|
||||||
if RAW_TEXT_TAGS[tagname] then
|
if RAW_TEXT_TAGS[tagname] then
|
||||||
local end_tag = (content:find("</"..tagname, i, true) or 0) - 1
|
local end_tag = (content:find("</"..tagname, i, true) or 0) - 1
|
||||||
if end_tag < 1 then
|
if end_tag < 1 then
|
||||||
|
@ -405,199 +374,121 @@ function M.tokenise( content )
|
||||||
print(content:sub(i))
|
print(content:sub(i))
|
||||||
os.exit(-5)
|
os.exit(-5)
|
||||||
end
|
end
|
||||||
|
|
||||||
local text_content = content:sub(i+1, end_tag)
|
local text_content = content:sub(i+1, end_tag)
|
||||||
|
-- Special handling of pre
|
||||||
-- special handling of pre
|
|
||||||
if tagname == "pre" then
|
if tagname == "pre" then
|
||||||
-- check if it "looks" like HTML
|
-- Check if it "looks" like HTML
|
||||||
if text_content:find("<", 1, true) and text_content:find(">", 1, true) then
|
if text_content:find("<", 1, true) and text_content:find(">", 1, true) then
|
||||||
-- tokenise the inner text
|
-- Tokenise the inner text
|
||||||
local text_tokens = M.tokenise( text_content )
|
local text_tokens = M.tokenise(text_content)
|
||||||
|
-- Add it to the current token list
|
||||||
-- and add it to the current token list
|
|
||||||
for _, tok in ipairs(text_tokens) do
|
for _, tok in ipairs(text_tokens) do
|
||||||
table.insert( TOKENS, tok )
|
table.insert(TOKENS, tok)
|
||||||
end
|
end
|
||||||
|
|
||||||
i = end_tag
|
i = end_tag
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
-- treat the rest as text
|
-- Treat the rest as text
|
||||||
|
|
||||||
i = end_tag
|
i = end_tag
|
||||||
table.insert( TOKENS, {type="TEXT", value=text_content} )
|
table.insert(TOKENS, {type="TEXT", value=text_content})
|
||||||
|
goto continue
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
in_tag = nil
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Handle content within tags
|
||||||
|
|
||||||
|
|
||||||
goto continue
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
----------------------------------------------------
|
|
||||||
--- "OLD", UNCHECKED CODE
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- if char == ">" and in_tag and currently_opened_quotes == nil then
|
|
||||||
-- if #text_memory ~= 0 then
|
|
||||||
-- local word = trim(text_memory)
|
|
||||||
-- if not word:match("^%s*$") then
|
|
||||||
-- table.insert( TOKENS, {type="WORD", value=word})
|
|
||||||
-- end
|
|
||||||
-- text_memory = ""
|
|
||||||
-- end
|
|
||||||
|
|
||||||
|
|
||||||
-- table.insert( TOKENS, {type = "END_TAG"} )
|
|
||||||
|
|
||||||
|
|
||||||
-- local curr_token = #TOKENS
|
|
||||||
-- while curr_token > 0 and (TOKENS[curr_token].type ~= "START_OPENING_TAG" or TOKENS[curr_token].type ~= "START_CLOSING_TAG") do
|
|
||||||
-- curr_token = curr_token - 1
|
|
||||||
-- end
|
|
||||||
-- curr_token = curr_token + 1
|
|
||||||
-- if curr_token == 1 and TOKENS[curr_token].type ~= "START_OPENING_TAG" and TOKENS[curr_token].type ~= "START_CLOSING_TAG" then
|
|
||||||
-- error("Error: Reached start of token stream while winding back to find tag name; Not supposed to be possible.")
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- if TOKENS[curr_token].type == "START_CLOSING_TAG" then
|
|
||||||
-- goto continue
|
|
||||||
-- end
|
|
||||||
|
|
||||||
|
|
||||||
-- local tagname = TOKENS[curr_token+1].value
|
|
||||||
|
|
||||||
-- if RAW_TEXT_TAGS[tagname] then
|
|
||||||
-- logger.printerr("Warning: "..tagname.." tags may contain text that would be incorrectly parsed as HTML.")
|
|
||||||
|
|
||||||
-- print(content:sub(1,i-1))
|
|
||||||
-- print(("="):rep(40))
|
|
||||||
-- print(content:sub(i))
|
|
||||||
|
|
||||||
-- local end_tag = content:find("</"..tagname, i, true) - 1
|
|
||||||
-- local text_content = content:sub(i+1, end_tag)
|
|
||||||
|
|
||||||
-- if tagname == "pre" and false then
|
|
||||||
-- -- check if it "looks" like HTML
|
|
||||||
-- if text_content:find("<", 1, true) and text_content:find(">", 1, true) then
|
|
||||||
-- -- tokenise the inner text
|
|
||||||
-- local text_tokens = M.tokenise( text_content )
|
|
||||||
|
|
||||||
-- -- and add it to the current token list
|
|
||||||
-- for _, tok in ipairs(text_tokens) do
|
|
||||||
-- if tok.value == nil then
|
|
||||||
-- print( "\t::: " .. tok.type )
|
|
||||||
-- else
|
|
||||||
-- print( "\t::: " .. tok.type .. ": " .. tostring(tok.value) )
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- table.insert( TOKENS, tok )
|
|
||||||
-- end
|
|
||||||
-- else
|
|
||||||
-- -- treat it as text
|
|
||||||
-- table.insert( TOKENS, {type="TEXT", value=text_content} )
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- i = end_tag
|
|
||||||
-- end
|
|
||||||
|
|
||||||
|
|
||||||
-- in_tag = false
|
|
||||||
-- goto continue
|
|
||||||
-- end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- if #text_memory ~= 0 then
|
|
||||||
-- if in_tag and currently_opened_quotes == nil then
|
|
||||||
-- local word = trim(text_memory)
|
|
||||||
|
|
||||||
-- if TOKENS[#TOKENS] and ( TOKENS[#TOKENS].type == "START_OPENING_TAG") then
|
|
||||||
-- if RAW_TEXT_TAGS[word] then
|
|
||||||
-- logger.printerr("Warning: "..word.." tags may contain text that would be incorrectly parsed as HTML.")
|
|
||||||
-- -- made possible because of the whitespace removal at the start
|
|
||||||
-- i = content:find("</"..word, i, true) - 1
|
|
||||||
-- end
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- if not word:match("^%s*$") then
|
|
||||||
-- table.insert( TOKENS, {type="WORD", value=word})
|
|
||||||
-- end
|
|
||||||
-- else
|
|
||||||
-- table.insert( TOKENS, {type="TEXT", value=text_memory} )
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- text_memory = ""
|
|
||||||
-- end
|
|
||||||
|
|
||||||
-- in_tag = false
|
|
||||||
-- table.insert( TOKENS, {type = "END_TAG"} )
|
|
||||||
|
|
||||||
-- goto continue
|
|
||||||
-- end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if in_tag then
|
if in_tag then
|
||||||
if currently_opened_quotes == nil and char:match("%s") then
|
-- Handle quoted content
|
||||||
if #text_memory ~= 0 then
|
if currently_opened_quotes ~= nil then
|
||||||
local word = trim(text_memory)
|
if char == currently_opened_quotes then
|
||||||
|
-- End of quoted section
|
||||||
-- if TOKENS[#TOKENS] and ( TOKENS[#TOKENS].type == "START_OPENING_TAG" ) then
|
|
||||||
-- if RAW_TEXT_TAGS[word] then
|
|
||||||
-- logger.printerr("Warning: "..word.." tags may contain text that would be incorrectly parsed as HTML.")
|
|
||||||
-- text_memory = ""
|
|
||||||
|
|
||||||
-- -- advance to closing ">"
|
|
||||||
-- i = content:find(">", i, true)
|
|
||||||
-- -- made possible because of the whitespace removal at the start
|
|
||||||
-- i = content:find("</"..word, i, true) - 1
|
|
||||||
-- end
|
|
||||||
-- end
|
|
||||||
|
|
||||||
if not word:match("^%s*$") then
|
|
||||||
table.insert( TOKENS, {type="WORD", value=word})
|
|
||||||
text_memory = ""
|
|
||||||
end
|
|
||||||
|
|
||||||
goto continue
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- if char == "'" or char == '"' then
|
|
||||||
-- -- found matching closing quote type
|
|
||||||
-- if char == currently_opened_quotes then
|
|
||||||
-- currently_opened_quotes = nil
|
|
||||||
-- elseif currently_opened_quotes == nil then
|
|
||||||
-- currently_opened_quotes = char
|
|
||||||
-- end
|
|
||||||
-- end
|
|
||||||
|
|
||||||
text_memory = text_memory .. char
|
text_memory = text_memory .. char
|
||||||
goto continue
|
currently_opened_quotes = nil
|
||||||
|
else
|
||||||
|
-- Continue collecting quoted content
|
||||||
|
text_memory = text_memory .. char
|
||||||
|
end
|
||||||
|
else
|
||||||
|
-- Start of quoted section
|
||||||
|
if char == "'" or char == '"' then
|
||||||
|
text_memory = text_memory .. char
|
||||||
|
currently_opened_quotes = char
|
||||||
|
-- Handle equals sign
|
||||||
|
elseif char == "=" then
|
||||||
|
-- If we have an attribute name saved and empty text_memory, this is an equals after whitespace
|
||||||
|
if attr_name and #text_memory == 0 then
|
||||||
|
text_memory = attr_name .. "="
|
||||||
|
attr_name = nil
|
||||||
|
else
|
||||||
|
text_memory = text_memory .. "="
|
||||||
|
end
|
||||||
|
-- Handle whitespace in tags
|
||||||
|
elseif char:match("%s") then
|
||||||
|
-- If we have text and it doesn't end with =, it might be an attribute name
|
||||||
|
if #text_memory > 0 then
|
||||||
|
-- Check if the next non-whitespace char is an equals sign
|
||||||
|
local next_pos = i + 1
|
||||||
|
while next_pos <= #content do
|
||||||
|
local next_char = content:sub(next_pos, next_pos)
|
||||||
|
if not next_char:match("%s") then
|
||||||
|
if next_char == "=" then
|
||||||
|
-- This is an attribute name followed by whitespace and equals
|
||||||
|
attr_name = text_memory
|
||||||
|
text_memory = ""
|
||||||
|
else
|
||||||
|
-- This is a complete word
|
||||||
|
local word = trim(text_memory)
|
||||||
|
if not word:match("^%s*$") then
|
||||||
|
table.insert(TOKENS, {type="WORD", value=word})
|
||||||
|
end
|
||||||
|
text_memory = ""
|
||||||
|
attr_name = nil
|
||||||
|
end
|
||||||
|
break
|
||||||
|
end
|
||||||
|
next_pos = next_pos + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
-- If we reached the end of the content
|
||||||
|
if next_pos > #content then
|
||||||
|
local word = trim(text_memory)
|
||||||
|
if not word:match("^%s*$") then
|
||||||
|
table.insert(TOKENS, {type="WORD", value=word})
|
||||||
|
end
|
||||||
|
text_memory = ""
|
||||||
|
attr_name = nil
|
||||||
|
end
|
||||||
|
end
|
||||||
else
|
else
|
||||||
text_memory = text_memory .. char
|
text_memory = text_memory .. char
|
||||||
goto continue
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
-- We're not in a tag, so collect text content
|
||||||
|
text_memory = text_memory .. char
|
||||||
|
end
|
||||||
|
|
||||||
::continue::
|
::continue::
|
||||||
i = i+1
|
i = i + 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Handle any remaining text
|
||||||
|
if #text_memory > 0 then
|
||||||
|
if in_tag then
|
||||||
|
local word = trim(text_memory)
|
||||||
|
if not word:match("^%s*$") then
|
||||||
|
table.insert(TOKENS, {type="WORD", value=word})
|
||||||
|
end
|
||||||
|
else
|
||||||
|
table.insert(TOKENS, {type="TEXT", value=text_memory})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return TOKENS
|
return TOKENS
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue