fix: ignore empty words in tokenisation
parent
feb98ab5ab
commit
a8a295aaf1
6
html.lua
6
html.lua
|
@ -238,7 +238,9 @@ function M.tokenise( content )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if not word:match("^%s*$") then
|
||||||
table.insert( TOKENS, {type="WORD", value=word})
|
table.insert( TOKENS, {type="WORD", value=word})
|
||||||
|
end
|
||||||
else
|
else
|
||||||
table.insert( TOKENS, {type="TEXT", value=text_memory} )
|
table.insert( TOKENS, {type="TEXT", value=text_memory} )
|
||||||
end
|
end
|
||||||
|
@ -271,8 +273,10 @@ function M.tokenise( content )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if not word:match("^%s*$") then
|
||||||
table.insert( TOKENS, {type="WORD", value=word})
|
table.insert( TOKENS, {type="WORD", value=word})
|
||||||
text_memory = ""
|
text_memory = ""
|
||||||
|
end
|
||||||
|
|
||||||
goto continue
|
goto continue
|
||||||
end
|
end
|
||||||
|
@ -418,7 +422,7 @@ function M.parse_tokens_into_document( TOKENS )
|
||||||
name = token.value:match("([%w-]+)")
|
name = token.value:match("([%w-]+)")
|
||||||
|
|
||||||
if name == nil then
|
if name == nil then
|
||||||
error("Unrecognised word: " .. name)
|
error("Unrecognised word: " .. tostring(name) .. " (Token ".. tostring(i) .." , type=" .. tostring(token.type) .. ", value=" .. tostring(token.value) .. ")")
|
||||||
end
|
end
|
||||||
|
|
||||||
current_doc_element.attributes[name] = true
|
current_doc_element.attributes[name] = true
|
||||||
|
|
Loading…
Reference in New Issue