forked from msva/lua-htmlparser
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtmlparser.lua
More file actions
69 lines (60 loc) · 2.23 KB
/
htmlparser.lua
File metadata and controls
69 lines (60 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
local ElementNode = require("htmlparser.ElementNode")
local voidelements = require("htmlparser.voidelements")
local HtmlParser = {}
local function parse(text)
local index = 0
local root = ElementNode:new(index, text)
local node, descend, tpos, opentags = root, true, 1, {}
while true do
local openstart, name
openstart, tpos, name = string.find(root._text,
"<" .. -- an uncaptured starting "<"
"(%w+)" .. -- name = the first word, directly following the "<"
"[^>]*>", -- include, but not capture everything up to the next ">"
tpos)
if not name then break end
index = index + 1
local tag = ElementNode:new(index, name, node, descend, openstart, tpos)
node = tag
local tagst, apos = tag:gettext(), 1
while true do
local start, k, eq, quote, v
start, apos, k, eq, quote = string.find(tagst,
"%s+" .. -- some uncaptured space
"([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">"
"(=?)" .. -- eq = the optional; "=", else ""
"(['\"]?)", -- quote = an optional "'" or '"' following the "=", or ""
apos)
if not k or k == "/>" or k == ">" then break end
if eq == "=" then
local pattern = "=([^%s>]*)"
if quote ~= "" then
pattern = quote .. "([^" .. quote .. "]*)" .. quote
end
start, apos, v = string.find(tagst, pattern, apos)
end
tag:addattribute(k, v or "")
end
if voidelements[string.lower(tag.name)] then
descend = false
tag:close()
else
opentags[tag.name] = opentags[tag.name] or {}
table.insert(opentags[tag.name], tag)
end
local closeend = tpos
while true do
local closestart, closing, closename
closestart, closeend, closing, closename = string.find(root._text, "[^<]*<(/?)(%w+)", closeend)
if not closing or closing == "" then break end
tag = table.remove(opentags[closename] or {}) or tag -- kludges for the cases of closing void or non-opened tags
closestart = string.find(root._text, "<", closestart)
tag:close(closestart, closeend + 1)
node = tag.parent
descend = true
end
end
return root
end
HtmlParser.parse = parse
return HtmlParser