From ebbdbc7b816369ae93820f1171644225d6da5f16 Mon Sep 17 00:00:00 2001 From: Aleksei Konovkin Date: Fri, 15 Dec 2023 13:19:07 +0300 Subject: [PATCH] memory fixes --- test/test-html-build.lua | 6 +-- xmlua/document.lua | 47 ++++++++++----------- xmlua/element.lua | 90 ++++++++++++++++++++-------------------- xmlua/libxml2.lua | 51 +++++++++++++++++++---- xmlua/libxml2/tree.lua | 1 + xmlua/node.lua | 4 +- xmlua/searchable.lua | 9 +++- xmlua/serializable.lua | 2 +- xmlua/xml.lua | 5 ++- 9 files changed, 132 insertions(+), 83 deletions(-) diff --git a/test/test-html-build.lua b/test/test-html-build.lua index ad888b2c..8c7818a7 100644 --- a/test/test-html-build.lua +++ b/test/test-html-build.lua @@ -26,7 +26,7 @@ function TestHTMLBuild.test_empty_root_dtd_systemid() local uri = "file:///usr/local/share/test.dtd" local document = HTML.build({"html"}, uri) luaunit.assertEquals({ - ffi.string(document.document.intSubset.SystemID), + ffi.string(document.raw_document.intSubset.SystemID), document:to_html() }, { @@ -43,8 +43,8 @@ function TestHTMLBuild.test_empty_root_dtd_publicid() local public_id = "-//W3C//DTD HTML 4.01//EN" local document = HTML.build({"html"}, uri, public_id) luaunit.assertEquals({ - ffi.string(document.document.intSubset.SystemID), - ffi.string(document.document.intSubset.ExternalID), + ffi.string(document.raw_document.intSubset.SystemID), + ffi.string(document.raw_document.intSubset.ExternalID), document:to_html() }, { diff --git a/xmlua/document.lua b/xmlua/document.lua index f3e43de6..016696d3 100644 --- a/xmlua/document.lua +++ b/xmlua/document.lua @@ -3,7 +3,6 @@ local Document = {} local libxml2 = require("xmlua.libxml2") local ffi = require("ffi") local converter = require("xmlua.converter") -local to_string = converter.to_string local Serializable = require("xmlua.serializable") local Searchable = require("xmlua.searchable") @@ -40,11 +39,11 @@ function metatable.__index(document, key) end function methods:root() - local root_element = libxml2.xmlDocGetRootElement(self.document) - if not root_element then + local node = libxml2.xmlDocGetRootElement(self.raw_document) + if not node then return nil end - return Element.new(self.document, root_element) + return Element.new(self, node) end function methods:parent() @@ -52,42 +51,42 @@ function methods:parent() end function methods:encoding() - return ffi.string(self.document.encoding) + return ffi.string(self.raw_document.encoding) end function methods:create_cdata_section(data) local raw_cdata_section_node = - libxml2.xmlNewCDataBlock(self.document, + libxml2.xmlNewCDataBlock(self.raw_document, data, data:len()) - return CDATASection.new(self.document, raw_cdata_section_node) + return CDATASection.new(self, raw_cdata_section_node) end function methods:create_comment(data) local raw_comment_node = libxml2.xmlNewComment(data) - return Comment.new(self.document, raw_comment_node) + return Comment.new(self, raw_comment_node) end function methods:create_document_fragment() local raw_document_fragment_node = - libxml2.xmlNewDocFragment(self.document) - return DocumentFragment.new(self.document, + libxml2.xmlNewDocFragment(self.raw_document) + return DocumentFragment.new(self, raw_document_fragment_node) end function methods:create_document_type(name, external_id, system_id) local raw_document_type = - libxml2.xmlCreateIntSubset(self.document, name, external_id, system_id) - return DocumentType.new(self.document, + libxml2.xmlCreateIntSubset(self.raw_document, name, external_id, system_id) + return DocumentType.new(self, raw_document_type) end function methods:get_internal_subset() local raw_document_type = - libxml2.xmlGetIntSubset(self.document) + libxml2.xmlGetIntSubset(self.raw_document) if raw_document_type ~= nil then - return DocumentType.new(self.document, + return DocumentType.new(self, raw_document_type) else return nil @@ -96,28 +95,28 @@ end function methods:add_entity_reference(name) local raw_entity_reference = - libxml2.xmlNewReference(self.document, name) - return EntityReference.new(self.document, + libxml2.xmlNewReference(self.raw_document, name) + return EntityReference.new(self, raw_entity_reference) end function methods:create_namespace(href, prefix) local raw_namespace = libxml2.xmlNewNs(self.node, href, prefix) - return Namespace.new(self.document, raw_namespace) + return Namespace.new(self, raw_namespace) end function methods:create_processing_instruction(name, content) local raw_processing_instruction = libxml2.xmlNewPI(name, content) - return ProcessingInstruction.new(self.document, + return ProcessingInstruction.new(self, raw_processing_instruction) end function methods:add_entity(entity_info) local entity_type_name = entity_info["entity_type"] local entity_type = converter.convert_entity_type_name(entity_type_name) - local raw_entity = libxml2.xmlAddDocEntity(self.document, + local raw_entity = libxml2.xmlAddDocEntity(self.raw_document, entity_info["name"], entity_type, entity_info["external_id"], @@ -127,14 +126,14 @@ function methods:add_entity(entity_info) end function methods:get_entity(name) - local raw_entity = libxml2.xmlGetDocEntity(self.document, name) + local raw_entity = libxml2.xmlGetDocEntity(self.raw_document, name) return converter.convert_xml_entity(raw_entity) end function methods:add_dtd_entity(entity_info) local entity_type_name = entity_info["entity_type"] local entity_type = converter.convert_entity_type_name(entity_type_name) - local raw_dtd_entity = libxml2.xmlAddDtdEntity(self.document, + local raw_dtd_entity = libxml2.xmlAddDtdEntity(self.raw_document, entity_info["name"], entity_type, entity_info["external_id"], @@ -144,7 +143,7 @@ function methods:add_dtd_entity(entity_info) end function methods:get_dtd_entity(name) - local raw_dtd_entity = libxml2.xmlGetDtdEntity(self.document, name) + local raw_dtd_entity = libxml2.xmlGetDtdEntity(self.raw_document, name) return converter.convert_xml_entity(raw_dtd_entity) end @@ -167,7 +166,7 @@ function Document.build(raw_document, tree) local root = Element.build(document, tree[1], tree[2]) if not libxml2.xmlDocSetRootElement(raw_document, root.node) then - root:unlink() + --root:unlink() return nil end @@ -187,7 +186,7 @@ function Document.new(raw_document, errors) errors = {} end local document = { - document = raw_document, + raw_document = raw_document, errors = errors, } setmetatable(document, metatable) diff --git a/xmlua/element.lua b/xmlua/element.lua index 8afebfac..0578252d 100644 --- a/xmlua/element.lua +++ b/xmlua/element.lua @@ -88,7 +88,7 @@ local function remove_namespace(node, prefix) end end - namespace = node.nsDef + local namespace = node.nsDef local namespace_previous = nil while namespace ~= ffi.NULL do if is_target_namespace(namespace) then @@ -127,65 +127,67 @@ local function set_attributes(element, attributes) end end -local function create_sub_element(document, node, name, attributes) +local function create_sub_element(document, parent, name, attributes) local namespace_prefix, local_name = parse_name(name) - local raw_element = libxml2.xmlNewNode(nil, local_name) - local element = Element.new(document, raw_element) + local node = libxml2.xmlNewNode(nil, local_name) + local element = Element.new(document, node) set_attributes(element, attributes) - local namespace = libxml2.xmlSearchNs(document, raw_element, namespace_prefix) - if not namespace and node then - namespace = libxml2.xmlSearchNs(document, node, namespace_prefix) + local namespace = libxml2.xmlSearchNs(document.raw_document, node, namespace_prefix) + if not namespace and parent then + namespace = libxml2.xmlSearchNs(document.raw_document, parent, namespace_prefix) end if namespace then - libxml2.xmlSetNs(raw_element, namespace) + libxml2.xmlSetNs(node, namespace) elseif namespace_prefix then element:unlink() - raw_element = libxml2.xmlNewNode(nil, name) - element = Element.new(document, raw_element) + node = libxml2.xmlNewNode(nil, name) + element = Element.new(document, node) set_attributes(element, attributes) end return element end -function methods:add_child(node) - if node.node.parent ~= ffi.NULL then - node:unlink() - end - local raw_added_node = - libxml2.xmlAddChild(self.node, node.node) - if raw_added_node ~= ffi.NULL then - ffi.gc(node.node, nil) +function methods:add_child(element) + local node + if self.document == element.document then + if element.node.parent ~= ffi.NULL then + element:unlink() + end + node = element.node + else + node = libxml2.xmlCopyNode(element.node, self.document.raw_document) end + libxml2.xmlAddChild(self.node, node) end -function methods:add_previous_sibling(node) - if not self.node and not node.node then +function methods:add_previous_sibling(element) + if not self.node and not element.node then error("Already freed receiver node and added node") elseif not self.node then error("Already freed receiver node") - elseif not node.node then + elseif not element.node then error("Already freed added node") end local raw_added_node, was_freed = - libxml2.xmlAddPrevSibling(self.node, node.node) + libxml2.xmlAddPrevSibling(self.node, element.node) if was_freed then - node.node = nil + element.node = nil end end -function methods:append_sibling(node) - if not self.node and not node.node then +function methods:append_sibling(element) + if not self.node and not element.node then error("Already freed receiver node and appended node") elseif not self.node then error("Already freed receiver node") - elseif not node.node then + elseif not element.node then error("Already freed appended node") end - local was_freed = libxml2.xmlAddSibling(self.node, node.node) + local was_freed = libxml2.xmlAddSibling(self.node, element.node) if was_freed then - node.node = nil + element.node = nil end end @@ -256,28 +258,28 @@ function methods:insert_element(position, name, attributes) end function methods:unlink() - local unlinked_node = Node.unlink(self) - return Element.new(nil, unlinked_node) + Node.unlink(self) + return self end function methods:get_attribute(name) local namespace_prefix, local_name = parse_name(name) if namespace_prefix == "xmlns" then - local namespace = libxml2.xmlSearchNs(self.document, self.node, local_name) + local namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, local_name) if namespace then return ffi.string(namespace.href) else return nil end elseif namespace_prefix == ffi.NULL and local_name == "xmlns" then - local namespace = libxml2.xmlSearchNs(self.document, self.node, nil) + local namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, nil) if namespace then return ffi.string(namespace.href) else return nil end elseif namespace_prefix then - local namespace = libxml2.xmlSearchNs(self.document, + local namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, namespace_prefix) if namespace then @@ -298,7 +300,7 @@ function methods:set_attribute(name, value) local namespace_prefix, local_name = parse_name(name) local namespace if namespace_prefix == "xmlns" then - namespace = libxml2.xmlSearchNs(self.document, + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, local_name) if namespace then @@ -308,7 +310,7 @@ function methods:set_attribute(name, value) libxml2.xmlNewNs(self.node, value, local_name) end elseif namespace_prefix == nil and local_name == "xmlns" then - namespace = libxml2.xmlSearchNs(self.document, self.node, nil) + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, nil) if namespace then libxml2.xmlFree(ffi.cast("void *", namespace.href)) namespace.href = libxml2.xmlStrdup(value) @@ -317,7 +319,7 @@ function methods:set_attribute(name, value) set_default_namespace(self.node, namespace) end elseif namespace_prefix then - namespace = libxml2.xmlSearchNs(self.document, + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, namespace_prefix) if namespace then @@ -341,7 +343,7 @@ function methods:remove_attribute(name) elseif namespace_prefix == nil and local_name == "xmlns" then remove_namespace(self.node, nil) elseif namespace_prefix then - namespace = libxml2.xmlSearchNs(self.document, + namespace = libxml2.xmlSearchNs(self.document.raw_document, self.node, namespace_prefix) if namespace then @@ -378,12 +380,12 @@ function methods:next() end function methods:root() - return Document.new(self.document):root() + return self.document:root() end function methods:parent() if tonumber(self.node.parent.type) == ffi.C.XML_DOCUMENT_NODE then - return Document.new(self.document) + return self.document else return Element.new(self.document, self.node.parent) end @@ -404,7 +406,7 @@ function methods:text() end function methods:namespaces() - local raw_namespaces = libxml2.xmlGetNsList(self.document, self.node) + local raw_namespaces = libxml2.xmlGetNsList(self.document.raw_document, self.node) if not raw_namespaces then return nil end @@ -427,23 +429,23 @@ function methods:find_namespace(prefix, href) local raw_namespace if not prefix and href then raw_namespace = - libxml2.xmlSearchNsByHref(self.document, self.node, href) + libxml2.xmlSearchNsByHref(self.document.raw_document, self.node, href) else raw_namespace = - libxml2.xmlSearchNs(self.document, self.node, prefix) + libxml2.xmlSearchNs(self.document.raw_document, self.node, prefix) end return Namespace.new(self.document, raw_namespace) end -- For internal use function Element.build(document, name, attributes) - return create_sub_element(document.node, nil, name, attributes) + return create_sub_element(document, nil, name, attributes) end function Element.new(document, node) local element = { document = document, - node = node, + node = node } setmetatable(element, metatable) return element diff --git a/xmlua/libxml2.lua b/xmlua/libxml2.lua index 366b5bda..760638a8 100644 --- a/xmlua/libxml2.lua +++ b/xmlua/libxml2.lua @@ -16,6 +16,10 @@ require("xmlua.libxml2.xmlsave") require("xmlua.libxml2.xpath") require("xmlua.libxml2.entities") +local function print_dbg(...) + --print(...) +end + local ffi = require("ffi") local loaded, xml2 = pcall(ffi.load, "xml2") if not loaded then @@ -116,7 +120,11 @@ function libxml2.htmlCtxtReadMemory(context, html, options) if document == ffi.NULL then return nil end - return ffi.gc(document, libxml2.xmlFreeDoc) + print_dbg("htmlCtxtReadMemory: ", document) + return ffi.gc(document, function(p) + print_dbg("xmlFreeDoc: ", p) + xml2.xmlFreeNode(p) + end) end jit.off(libxml2.htmlCtxtReadMemory) @@ -133,7 +141,11 @@ function libxml2.xmlNewParserCtxt() if context == ffi.NULL then return nil end - return ffi.gc(context, xml2.xmlFreeParserCtxt) + print_dbg("xmlNewParserCtxt: ", context) + return ffi.gc(context, function(p) + print_dbg("xmlFreeParserCtxt: ", p) + xml2.xmlFreeParserCtxt(p) + end) end function libxml2.xmlCreatePushParserCtxt(filename) @@ -197,7 +209,12 @@ function libxml2.xmlCtxtReadMemory(context, xml, options) if document == ffi.NULL then return nil end - return ffi.gc(document, libxml2.xmlFreeDoc) + print_dbg("xmlCtxtReadMemory: ", document, ", ctx=", context) + return ffi.gc(document, function(p) + print_dbg("xmlFreeDoc: ", p, ", ctx=", context) + xml2.xmlFreeDoc(p) + end) + end jit.off(libxml2.xmlCtxtReadMemory) @@ -289,7 +306,11 @@ function libxml2.xmlNewDoc(xml_version) if document == ffi.NULL then return nil end - return ffi.gc(document, libxml2.xmlFreeDoc) + print_dbg("xmlNewDoc: ", document) + return ffi.gc(document, function(p) + print_dbg("xmlFreeDoc: ", p) + xml2.xmlFreeDoc(p) + end) end function libxml2.xmlDocSetRootElement(document, root) @@ -572,10 +593,21 @@ end function libxml2.xmlUnlinkNode(node) xml2.xmlUnlinkNode(node) - xml2.xmlSetTreeDoc(node, ffi.NULL) - return ffi.gc(node, xml2.xmlFreeNode) + print_dbg("xmlUnlinkNode: ", node, ", doc=", node.doc) + return ffi.gc(node, function(p) + print_dbg("xmlFreeNode: ", p, ", doc=", p.doc) + xml2.xmlFreeNode(p) + end) +end + +function libxml2.xmlCopyNode(node, doc) + return xml2.xmlDocCopyNode(node, doc, 2) end +function libxml2.xmlFreeNode(node) + print_dbg("xmlFreeNode: ", node, ", doc=", node.doc) + xml2.xmlFreeNode(node) +end function libxml2.xmlBufferCreate() return ffi.gc(xml2.xmlBufferCreate(), xml2.xmlBufferFree) @@ -643,10 +675,15 @@ function libxml2.xmlXPathEvalExpression(expression, context) if object == ffi.NULL then return nil end - return ffi.gc(object, xml2.xmlXPathFreeObject) + return object end jit.off(libxml2.xmlXPathEvalExpression) +function libxml2.xmlXPathFreeObject(object) + xml2.xmlXPathFreeObject(object) +end +jit.off(libxml2.xmlXPathFreeObject) + function libxml2.xmlXPathRegisterNs(context, prefix, namespace_uri) local status = xml2.xmlXPathRegisterNs(context, prefix, namespace_uri) return status == 0 diff --git a/xmlua/libxml2/tree.lua b/xmlua/libxml2/tree.lua index 1c82a775..b2a1491c 100644 --- a/xmlua/libxml2/tree.lua +++ b/xmlua/libxml2/tree.lua @@ -342,4 +342,5 @@ xmlChar *xmlGetNodePath(const xmlNode *node); void xmlFreeNode(xmlNodePtr cur); void xmlUnlinkNode(xmlNodePtr cur); void xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc); +xmlNodePtr xmlDocCopyNode(xmlNodePtr node, xmlDocPtr doc, int extended); ]] diff --git a/xmlua/node.lua b/xmlua/node.lua index 2df258cc..68bf24d3 100644 --- a/xmlua/node.lua +++ b/xmlua/node.lua @@ -36,6 +36,8 @@ function Node:path() end function Node:unlink() - return libxml2.xmlUnlinkNode(self.node) + libxml2.xmlUnlinkNode(self.node) + return self end + return Node diff --git a/xmlua/searchable.lua b/xmlua/searchable.lua index 0e24b871..922f253b 100644 --- a/xmlua/searchable.lua +++ b/xmlua/searchable.lua @@ -51,8 +51,9 @@ local ERROR_MESSAGES = { } function Searchable:search(xpath, namespaces) - local document = self.document - local context = libxml2.xmlXPathNewContext(document) + local document = self.document or self + local raw_document = document.raw_document + local context = libxml2.xmlXPathNewContext(raw_document) if not context then error("failed to create XPath context") end @@ -96,6 +97,7 @@ function Searchable:search(xpath, namespaces) if type == ffi.C.XPATH_NODESET then local found_node_set = object.nodesetval if found_node_set == ffi.NULL then + libxml2.xmlXPathFreeObject(object) return NodeSet.new({}) end local raw_node_set = {} @@ -120,7 +122,10 @@ function Searchable:search(xpath, namespaces) -- table.insert(raw_node_set, node) end end + libxml2.xmlXPathFreeObject(object) return NodeSet.new(raw_node_set) + else + libxml2.xmlXPathFreeObject(object) end end diff --git a/xmlua/serializable.lua b/xmlua/serializable.lua index bb7fd083..d498924b 100644 --- a/xmlua/serializable.lua +++ b/xmlua/serializable.lua @@ -17,7 +17,7 @@ local function save(target, flags, failure_message, options) if target.node then success = libxml2.xmlSaveTree(context, target.node) else - success = libxml2.xmlSaveDoc(context, target.document) + success = libxml2.xmlSaveDoc(context, target.raw_document) end libxml2.xmlSaveClose(context) if not success then diff --git a/xmlua/xml.lua b/xmlua/xml.lua index 3b238f08..1f48a743 100644 --- a/xmlua/xml.lua +++ b/xmlua/xml.lua @@ -23,7 +23,10 @@ function XML.parse(xml, options) error("Failed to parse XML: " .. ffi.string(context.lastError.message)) end end - return Document.new(document) + local doc = Document.new(document) + -- attach context to extend lifetime of dictionaries + doc.__context__ = context + return doc end return XML