From 68ca520c1ebec008ebf0e8c9464067633985dadc Mon Sep 17 00:00:00 2001 From: Aleksei Konovkin Date: Fri, 15 Dec 2023 13:19:07 +0300 Subject: [PATCH] memory fixes --- test/test-html-build.lua | 6 +-- xmlua/attribute-declaration.lua | 2 +- xmlua/attribute.lua | 2 +- xmlua/cdata-section.lua | 2 +- xmlua/comment.lua | 2 +- xmlua/document-type.lua | 2 +- xmlua/document.lua | 65 ++++++++++++++--------- xmlua/element-declaration.lua | 2 +- xmlua/element.lua | 92 +++++++++++++++++---------------- xmlua/libxml2.lua | 51 +++++++++++++++--- xmlua/libxml2/tree.lua | 1 + xmlua/node.lua | 6 ++- xmlua/searchable.lua | 9 +++- xmlua/serializable.lua | 2 +- xmlua/xml.lua | 5 +- 15 files changed, 159 insertions(+), 90 deletions(-) diff --git a/test/test-html-build.lua b/test/test-html-build.lua index ad888b2c..8c7818a7 100644 --- a/test/test-html-build.lua +++ b/test/test-html-build.lua @@ -26,7 +26,7 @@ function TestHTMLBuild.test_empty_root_dtd_systemid() local uri = "file:///usr/local/share/test.dtd" local document = HTML.build({"html"}, uri) luaunit.assertEquals({ - ffi.string(document.document.intSubset.SystemID), + ffi.string(document.raw_document.intSubset.SystemID), document:to_html() }, { @@ -43,8 +43,8 @@ function TestHTMLBuild.test_empty_root_dtd_publicid() local public_id = "-//W3C//DTD HTML 4.01//EN" local document = HTML.build({"html"}, uri, public_id) luaunit.assertEquals({ - ffi.string(document.document.intSubset.SystemID), - ffi.string(document.document.intSubset.ExternalID), + ffi.string(document.raw_document.intSubset.SystemID), + ffi.string(document.raw_document.intSubset.ExternalID), document:to_html() }, { diff --git a/xmlua/attribute-declaration.lua b/xmlua/attribute-declaration.lua index db993ad1..5bc14152 100644 --- a/xmlua/attribute-declaration.lua +++ b/xmlua/attribute-declaration.lua @@ -13,7 +13,7 @@ end function AttributeDeclaration.new(document, node) local attribute_declaration = { document = document, - node = node, + node = node } setmetatable(attribute_declaration, metatable) return attribute_declaration diff --git a/xmlua/attribute.lua b/xmlua/attribute.lua index 00b4ab58..780fb59f 100644 --- a/xmlua/attribute.lua +++ b/xmlua/attribute.lua @@ -32,7 +32,7 @@ end function Attribute.new(document, node) local attr = { document = document, - node = node, + node = node } setmetatable(attr, metatable) return attr diff --git a/xmlua/cdata-section.lua b/xmlua/cdata-section.lua index b791900f..44bd4454 100644 --- a/xmlua/cdata-section.lua +++ b/xmlua/cdata-section.lua @@ -13,7 +13,7 @@ end function CDATASection.new(document, node) local cdata_section = { document = document, - node = node, + node = node } setmetatable(cdata_section, metatable) return cdata_section diff --git a/xmlua/comment.lua b/xmlua/comment.lua index 962b7c2e..ab080d45 100644 --- a/xmlua/comment.lua +++ b/xmlua/comment.lua @@ -13,7 +13,7 @@ end function Comment.new(document, node) local comment = { document = document, - node = node, + node = node } setmetatable(comment, metatable) return comment diff --git a/xmlua/document-type.lua b/xmlua/document-type.lua index 2723e25d..55267931 100644 --- a/xmlua/document-type.lua +++ b/xmlua/document-type.lua @@ -26,7 +26,7 @@ end function DocumentType.new(document, node) local document_type = { document = document, - node = node, + node = node } setmetatable(document_type, metatable) return document_type diff --git a/xmlua/document.lua b/xmlua/document.lua index f3e43de6..4b655f2d 100644 --- a/xmlua/document.lua +++ b/xmlua/document.lua @@ -3,7 +3,6 @@ local Document = {} local libxml2 = require("xmlua.libxml2") local ffi = require("ffi") local converter = require("xmlua.converter") -local to_string = converter.to_string local Serializable = require("xmlua.serializable") local Searchable = require("xmlua.searchable") @@ -29,6 +28,14 @@ function Document.lazy_load() ProcessingInstruction = require("xmlua.processing-instruction") end +local function print_dbg(...) + -- if ngx then + -- ngx.log(ngx.DEBUG, ...) + -- else + -- print(...) + -- end +end + local methods = {} local metatable = {} @@ -40,11 +47,11 @@ function metatable.__index(document, key) end function methods:root() - local root_element = libxml2.xmlDocGetRootElement(self.document) - if not root_element then + local node = libxml2.xmlDocGetRootElement(self.raw_document) + if not node then return nil end - return Element.new(self.document, root_element) + return Element.new(self, node) end function methods:parent() @@ -52,42 +59,42 @@ function methods:parent() end function methods:encoding() - return ffi.string(self.document.encoding) + return ffi.string(self.raw_document.encoding) end function methods:create_cdata_section(data) local raw_cdata_section_node = - libxml2.xmlNewCDataBlock(self.document, + libxml2.xmlNewCDataBlock(self.raw_document, data, data:len()) - return CDATASection.new(self.document, raw_cdata_section_node) + return CDATASection.new(self, raw_cdata_section_node) end function methods:create_comment(data) local raw_comment_node = libxml2.xmlNewComment(data) - return Comment.new(self.document, raw_comment_node) + return Comment.new(self, raw_comment_node) end function methods:create_document_fragment() local raw_document_fragment_node = - libxml2.xmlNewDocFragment(self.document) - return DocumentFragment.new(self.document, + libxml2.xmlNewDocFragment(self.raw_document) + return DocumentFragment.new(self, raw_document_fragment_node) end function methods:create_document_type(name, external_id, system_id) local raw_document_type = - libxml2.xmlCreateIntSubset(self.document, name, external_id, system_id) - return DocumentType.new(self.document, + libxml2.xmlCreateIntSubset(self.raw_document, name, external_id, system_id) + return DocumentType.new(self, raw_document_type) end function methods:get_internal_subset() local raw_document_type = - libxml2.xmlGetIntSubset(self.document) + libxml2.xmlGetIntSubset(self.raw_document) if raw_document_type ~= nil then - return DocumentType.new(self.document, + return DocumentType.new(self, raw_document_type) else return nil @@ -96,28 +103,28 @@ end function methods:add_entity_reference(name) local raw_entity_reference = - libxml2.xmlNewReference(self.document, name) - return EntityReference.new(self.document, + libxml2.xmlNewReference(self.raw_document, name) + return EntityReference.new(self, raw_entity_reference) end function methods:create_namespace(href, prefix) local raw_namespace = libxml2.xmlNewNs(self.node, href, prefix) - return Namespace.new(self.document, raw_namespace) + return Namespace.new(self, raw_namespace) end function methods:create_processing_instruction(name, content) local raw_processing_instruction = libxml2.xmlNewPI(name, content) - return ProcessingInstruction.new(self.document, + return ProcessingInstruction.new(self, raw_processing_instruction) end function methods:add_entity(entity_info) local entity_type_name = entity_info["entity_type"] local entity_type = converter.convert_entity_type_name(entity_type_name) - local raw_entity = libxml2.xmlAddDocEntity(self.document, + local raw_entity = libxml2.xmlAddDocEntity(self.raw_document, entity_info["name"], entity_type, entity_info["external_id"], @@ -127,14 +134,14 @@ function methods:add_entity(entity_info) end function methods:get_entity(name) - local raw_entity = libxml2.xmlGetDocEntity(self.document, name) + local raw_entity = libxml2.xmlGetDocEntity(self.raw_document, name) return converter.convert_xml_entity(raw_entity) end function methods:add_dtd_entity(entity_info) local entity_type_name = entity_info["entity_type"] local entity_type = converter.convert_entity_type_name(entity_type_name) - local raw_dtd_entity = libxml2.xmlAddDtdEntity(self.document, + local raw_dtd_entity = libxml2.xmlAddDtdEntity(self.raw_document, entity_info["name"], entity_type, entity_info["external_id"], @@ -144,7 +151,7 @@ function methods:add_dtd_entity(entity_info) end function methods:get_dtd_entity(name) - local raw_dtd_entity = libxml2.xmlGetDtdEntity(self.document, name) + local raw_dtd_entity = libxml2.xmlGetDtdEntity(self.raw_document, name) return converter.convert_xml_entity(raw_dtd_entity) end @@ -167,7 +174,7 @@ function Document.build(raw_document, tree) local root = Element.build(document, tree[1], tree[2]) if not libxml2.xmlDocSetRootElement(raw_document, root.node) then - root:unlink() + --root:unlink() return nil end @@ -186,10 +193,20 @@ function Document.new(raw_document, errors) if not errors then errors = {} end + local unlinked = {} local document = { - document = raw_document, + raw_document = raw_document, errors = errors, + unlinked = unlinked } + ffi.gc(document.raw_document, function(pdocument) + for node in pairs(unlinked) do + print_dbg("Free unlinked: ", node) + libxml2.xmlFreeNode(node) + end + print_dbg("xmlFreeDoc: ", pdocument) + libxml2.xmlFreeDoc(pdocument) + end) setmetatable(document, metatable) return document end diff --git a/xmlua/element-declaration.lua b/xmlua/element-declaration.lua index a954b576..570fec7b 100644 --- a/xmlua/element-declaration.lua +++ b/xmlua/element-declaration.lua @@ -13,7 +13,7 @@ end function ElementDeclaration.new(document, node) local element_declaration = { document = document, - node = node, + node = node } setmetatable(element_declaration, metatable) return element_declaration diff --git a/xmlua/element.lua b/xmlua/element.lua index 8afebfac..e00beebd 100644 --- a/xmlua/element.lua +++ b/xmlua/element.lua @@ -88,7 +88,7 @@ local function remove_namespace(node, prefix) end end - namespace = node.nsDef + local namespace = node.nsDef local namespace_previous = nil while namespace ~= ffi.NULL do if is_target_namespace(namespace) then @@ -127,65 +127,69 @@ local function set_attributes(element, attributes) end end -local function create_sub_element(document, node, name, attributes) +local function create_sub_element(document, parent, name, attributes) local namespace_prefix, local_name = parse_name(name) - local raw_element = libxml2.xmlNewNode(nil, local_name) - local element = Element.new(document, raw_element) + local node = libxml2.xmlNewNode(nil, local_name) + local element = Element.new(document, node) set_attributes(element, attributes) - local namespace = libxml2.xmlSearchNs(document, raw_element, namespace_prefix) - if not namespace and node then - namespace = libxml2.xmlSearchNs(document, node, namespace_prefix) + local namespace = libxml2.xmlSearchNs(document.raw_document, node, namespace_prefix) + if not namespace and parent then + namespace = libxml2.xmlSearchNs(document.raw_document, parent, namespace_prefix) end if namespace then - libxml2.xmlSetNs(raw_element, namespace) + libxml2.xmlSetNs(node, namespace) elseif namespace_prefix then element:unlink() - raw_element = libxml2.xmlNewNode(nil, name) - element = Element.new(document, raw_element) + node = libxml2.xmlNewNode(nil, name) + element = Element.new(document, node) set_attributes(element, attributes) end return element end -function methods:add_child(node) - if node.node.parent ~= ffi.NULL then - node:unlink() - end - local raw_added_node = - libxml2.xmlAddChild(self.node, node.node) - if raw_added_node ~= ffi.NULL then - ffi.gc(node.node, nil) +function methods:add_child(element) + local node + if self.document == element.document then + if element.node.parent ~= ffi.NULL then + element:unlink() + end + node = element.node + -- node back to the document but to the another place + element.document.unlinked[node] = nil + else + node = libxml2.xmlDocCopyNode(element.node, self.document.raw_document) end + libxml2.xmlAddChild(self.node, node) end -function methods:add_previous_sibling(node) - if not self.node and not node.node then +function methods:add_previous_sibling(element) + if not self.node and not element.node then error("Already freed receiver node and added node") elseif not self.node then error("Already freed receiver node") - elseif not node.node then + elseif not element.node then error("Already freed added node") end local raw_added_node, was_freed = - libxml2.xmlAddPrevSibling(self.node, node.node) + libxml2.xmlAddPrevSibling(self.node, element.node) if was_freed then - node.node = nil + element.node = nil end end -function methods:append_sibling(node) - if not self.node and not node.node then +function methods:append_sibling(element) + if not self.node and not element.node then error("Already freed receiver node and appended node") elseif not self.node then error("Already freed receiver node") - elseif not node.node then + elseif not element.node then error("Already freed appended node") end - local was_freed = libxml2.xmlAddSibling(self.node, node.node) + local was_freed = libxml2.xmlAddSibling(self.node, element.node) if was_freed then - node.node = nil + element.node = nil end end @@ -256,28 +260,28 @@ function methods:insert_element(position, name, attributes) end function methods:unlink() - local unlinked_node = Node.unlink(self) - return Element.new(nil, unlinked_node) + Node.unlink(self) + return self end function methods:get_attribute(name) local namespace_prefix, local_name = parse_name(name) if namespace_prefix == "xmlns" then - local namespace = libxml2.xmlSearchNs(self.document, self.node, local_name) + local namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, local_name) if namespace then return ffi.string(namespace.href) else return nil end elseif namespace_prefix == ffi.NULL and local_name == "xmlns" then - local namespace = libxml2.xmlSearchNs(self.document, self.node, nil) + local namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, nil) if namespace then return ffi.string(namespace.href) else return nil end elseif namespace_prefix then - local namespace = libxml2.xmlSearchNs(self.document, + local namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, namespace_prefix) if namespace then @@ -298,7 +302,7 @@ function methods:set_attribute(name, value) local namespace_prefix, local_name = parse_name(name) local namespace if namespace_prefix == "xmlns" then - namespace = libxml2.xmlSearchNs(self.document, + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, local_name) if namespace then @@ -308,7 +312,7 @@ function methods:set_attribute(name, value) libxml2.xmlNewNs(self.node, value, local_name) end elseif namespace_prefix == nil and local_name == "xmlns" then - namespace = libxml2.xmlSearchNs(self.document, self.node, nil) + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, nil) if namespace then libxml2.xmlFree(ffi.cast("void *", namespace.href)) namespace.href = libxml2.xmlStrdup(value) @@ -317,7 +321,7 @@ function methods:set_attribute(name, value) set_default_namespace(self.node, namespace) end elseif namespace_prefix then - namespace = libxml2.xmlSearchNs(self.document, + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, namespace_prefix) if namespace then @@ -341,7 +345,7 @@ function methods:remove_attribute(name) elseif namespace_prefix == nil and local_name == "xmlns" then remove_namespace(self.node, nil) elseif namespace_prefix then - namespace = libxml2.xmlSearchNs(self.document, + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, namespace_prefix) if namespace then @@ -378,12 +382,12 @@ function methods:next() end function methods:root() - return Document.new(self.document):root() + return self.document:root() end function methods:parent() if tonumber(self.node.parent.type) == ffi.C.XML_DOCUMENT_NODE then - return Document.new(self.document) + return self.document else return Element.new(self.document, self.node.parent) end @@ -404,7 +408,7 @@ function methods:text() end function methods:namespaces() - local raw_namespaces = libxml2.xmlGetNsList(self.document, self.node) + local raw_namespaces = libxml2.xmlGetNsList(self.document.raw_document, self.node) if not raw_namespaces then return nil end @@ -427,23 +431,23 @@ function methods:find_namespace(prefix, href) local raw_namespace if not prefix and href then raw_namespace = - libxml2.xmlSearchNsByHref(self.document, self.node, href) + libxml2.xmlSearchNsByHref(self.document.raw_document, self.node, href) else raw_namespace = - libxml2.xmlSearchNs(self.document, self.node, prefix) + libxml2.xmlSearchNs(self.document.raw_document, self.node, prefix) end return Namespace.new(self.document, raw_namespace) end -- For internal use function Element.build(document, name, attributes) - return create_sub_element(document.node, nil, name, attributes) + return create_sub_element(document, nil, name, attributes) end function Element.new(document, node) local element = { document = document, - node = node, + node = node } setmetatable(element, metatable) return element diff --git a/xmlua/libxml2.lua b/xmlua/libxml2.lua index 366b5bda..9e1687a2 100644 --- a/xmlua/libxml2.lua +++ b/xmlua/libxml2.lua @@ -16,6 +16,14 @@ require("xmlua.libxml2.xmlsave") require("xmlua.libxml2.xpath") require("xmlua.libxml2.entities") +local function print_dbg(...) + -- if ngx then + -- ngx.log(ngx.DEBUG, ...) + -- else + -- print(...) + -- end +end + local ffi = require("ffi") local loaded, xml2 = pcall(ffi.load, "xml2") if not loaded then @@ -116,7 +124,11 @@ function libxml2.htmlCtxtReadMemory(context, html, options) if document == ffi.NULL then return nil end - return ffi.gc(document, libxml2.xmlFreeDoc) + print_dbg("htmlCtxtReadMemory: ", document) + return ffi.gc(document, function(p) + print_dbg("xmlFreeDoc: ", p) + xml2.xmlFreeNode(p) + end) end jit.off(libxml2.htmlCtxtReadMemory) @@ -133,7 +145,11 @@ function libxml2.xmlNewParserCtxt() if context == ffi.NULL then return nil end - return ffi.gc(context, xml2.xmlFreeParserCtxt) + print_dbg("xmlNewParserCtxt: ", context) + return ffi.gc(context, function(p) + print_dbg("xmlFreeParserCtxt: ", p) + xml2.xmlFreeParserCtxt(p) + end) end function libxml2.xmlCreatePushParserCtxt(filename) @@ -197,7 +213,12 @@ function libxml2.xmlCtxtReadMemory(context, xml, options) if document == ffi.NULL then return nil end - return ffi.gc(document, libxml2.xmlFreeDoc) + print_dbg("xmlCtxtReadMemory: ", document, ", ctx=", context) + return ffi.gc(document, function(p) + print_dbg("xmlFreeDoc: ", p, ", ctx=", context) + xml2.xmlFreeDoc(p) + end) + end jit.off(libxml2.xmlCtxtReadMemory) @@ -289,7 +310,11 @@ function libxml2.xmlNewDoc(xml_version) if document == ffi.NULL then return nil end - return ffi.gc(document, libxml2.xmlFreeDoc) + print_dbg("xmlNewDoc: ", document) + return ffi.gc(document, function(p) + print_dbg("xmlFreeDoc: ", p) + xml2.xmlFreeDoc(p) + end) end function libxml2.xmlDocSetRootElement(document, root) @@ -572,10 +597,17 @@ end function libxml2.xmlUnlinkNode(node) xml2.xmlUnlinkNode(node) - xml2.xmlSetTreeDoc(node, ffi.NULL) - return ffi.gc(node, xml2.xmlFreeNode) + print_dbg("xmlUnlinkNode: ", node, ", doc=", node.doc) +end + +function libxml2.xmlDocCopyNode(node, doc) + return xml2.xmlDocCopyNode(node, doc, 2) end +function libxml2.xmlFreeNode(node) + print_dbg("xmlFreeNode: ", node, ", doc=", node.doc) + xml2.xmlFreeNode(node) +end function libxml2.xmlBufferCreate() return ffi.gc(xml2.xmlBufferCreate(), xml2.xmlBufferFree) @@ -643,10 +675,15 @@ function libxml2.xmlXPathEvalExpression(expression, context) if object == ffi.NULL then return nil end - return ffi.gc(object, xml2.xmlXPathFreeObject) + return object end jit.off(libxml2.xmlXPathEvalExpression) +function libxml2.xmlXPathFreeObject(object) + xml2.xmlXPathFreeObject(object) +end +jit.off(libxml2.xmlXPathFreeObject) + function libxml2.xmlXPathRegisterNs(context, prefix, namespace_uri) local status = xml2.xmlXPathRegisterNs(context, prefix, namespace_uri) return status == 0 diff --git a/xmlua/libxml2/tree.lua b/xmlua/libxml2/tree.lua index 1c82a775..b2a1491c 100644 --- a/xmlua/libxml2/tree.lua +++ b/xmlua/libxml2/tree.lua @@ -342,4 +342,5 @@ xmlChar *xmlGetNodePath(const xmlNode *node); void xmlFreeNode(xmlNodePtr cur); void xmlUnlinkNode(xmlNodePtr cur); void xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc); +xmlNodePtr xmlDocCopyNode(xmlNodePtr node, xmlDocPtr doc, int extended); ]] diff --git a/xmlua/node.lua b/xmlua/node.lua index 2df258cc..62007655 100644 --- a/xmlua/node.lua +++ b/xmlua/node.lua @@ -1,7 +1,6 @@ local Node = {} local libxml2 = require("xmlua.libxml2") -local ffi = require("ffi") function Node:replace(replace_node) if not self.node and not replace_node.node then @@ -36,6 +35,9 @@ function Node:path() end function Node:unlink() - return libxml2.xmlUnlinkNode(self.node) + libxml2.xmlUnlinkNode(self.node) + self.document.unlinked[self.node] = true + return self end + return Node diff --git a/xmlua/searchable.lua b/xmlua/searchable.lua index 0e24b871..922f253b 100644 --- a/xmlua/searchable.lua +++ b/xmlua/searchable.lua @@ -51,8 +51,9 @@ local ERROR_MESSAGES = { } function Searchable:search(xpath, namespaces) - local document = self.document - local context = libxml2.xmlXPathNewContext(document) + local document = self.document or self + local raw_document = document.raw_document + local context = libxml2.xmlXPathNewContext(raw_document) if not context then error("failed to create XPath context") end @@ -96,6 +97,7 @@ function Searchable:search(xpath, namespaces) if type == ffi.C.XPATH_NODESET then local found_node_set = object.nodesetval if found_node_set == ffi.NULL then + libxml2.xmlXPathFreeObject(object) return NodeSet.new({}) end local raw_node_set = {} @@ -120,7 +122,10 @@ function Searchable:search(xpath, namespaces) -- table.insert(raw_node_set, node) end end + libxml2.xmlXPathFreeObject(object) return NodeSet.new(raw_node_set) + else + libxml2.xmlXPathFreeObject(object) end end diff --git a/xmlua/serializable.lua b/xmlua/serializable.lua index bb7fd083..d498924b 100644 --- a/xmlua/serializable.lua +++ b/xmlua/serializable.lua @@ -17,7 +17,7 @@ local function save(target, flags, failure_message, options) if target.node then success = libxml2.xmlSaveTree(context, target.node) else - success = libxml2.xmlSaveDoc(context, target.document) + success = libxml2.xmlSaveDoc(context, target.raw_document) end libxml2.xmlSaveClose(context) if not success then diff --git a/xmlua/xml.lua b/xmlua/xml.lua index 3b238f08..1f48a743 100644 --- a/xmlua/xml.lua +++ b/xmlua/xml.lua @@ -23,7 +23,10 @@ function XML.parse(xml, options) error("Failed to parse XML: " .. ffi.string(context.lastError.message)) end end - return Document.new(document) + local doc = Document.new(document) + -- attach context to extend lifetime of dictionaries + doc.__context__ = context + return doc end return XML