Skip to content

Commit

Permalink
Convert SDT checkboxes to checkbox inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
mwilliamson committed Dec 30, 2024
1 parent 6448068 commit 7c58744
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 2 deletions.
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.9.0

* Detect checkboxes, both as complex fields and structured document tags, and
convert them to checkbox inputs.

# 1.8.0

* Add style mapping for highlights.
Expand Down
17 changes: 15 additions & 2 deletions mammoth/docx/body_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ def read_boolean_element(element):
if element is None:
return False
else:
return element.attributes.get("w:val") not in ["false", "0"]
return read_boolean_attribute_value(element.attributes.get("w:val"))

def read_boolean_attribute_value(value):
return value not in ["false", "0"]

def read_underline_element(element):
return element and element.attributes.get("w:val") not in [None, "false", "0", "none"]
Expand Down Expand Up @@ -569,7 +572,17 @@ def alternate_content(element):
return read_child_elements(element.find_child("mc:Fallback"))

def read_sdt(element):
return read_child_elements(element.find_child_or_null("w:sdtContent"))
checkbox = element.find_child_or_null("w:sdtPr").find_child("wordml:checkbox")

if checkbox is not None:
checked_element = checkbox.find_child("wordml:checked")
is_checked = (
checked_element is not None and
read_boolean_attribute_value(checked_element.attributes.get("wordml:val"))
)
return _success(documents.checkbox(checked=is_checked))
else:
return read_child_elements(element.find_child_or_null("w:sdtContent"))

handlers = {
"w:t": text,
Expand Down
4 changes: 4 additions & 0 deletions mammoth/docx/office_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
("mc", "http://schemas.openxmlformats.org/markup-compatibility/2006"),
("v", "urn:schemas-microsoft-com:vml"),
("office-word", "urn:schemas-microsoft-com:office:word"),

# [MS-DOCX]: Word Extensions to the Office Open XML (.docx) File Format
# https://learn.microsoft.com/en-us/openspecs/office_standards/ms-docx/b839fe1f-e1ca-4fa6-8c26-5954d0abbccd
("wordml", "http://schemas.microsoft.com/office/word/2010/wordml"),
]


Expand Down
37 changes: 37 additions & 0 deletions tests/docx/body_xml_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,43 @@ def test_complex_field_checkbox_with_default_0_and_checked_1_is_checked(self):
is_run(children=is_sequence(is_checkbox(checked=True))),
)))

def test_structured_document_tag_checkbox_without_checked_is_not_checked(self):
element = xml_element("w:sdt", {}, [
xml_element("w:sdtPr", {}, [
xml_element("wordml:checkbox"),
]),
])

result = _read_and_get_document_xml_element(element)

assert_that(result, is_checkbox(checked=False))

def test_structured_document_tag_checkbox_with_checked_0_is_not_checked(self):
element = xml_element("w:sdt", {}, [
xml_element("w:sdtPr", {}, [
xml_element("wordml:checkbox", {}, [
xml_element("wordml:checked", {"wordml:val": "0"}),
]),
]),
])

result = _read_and_get_document_xml_element(element)

assert_that(result, is_checkbox(checked=False))

def test_structured_document_tag_checkbox_with_checked_1_is_checked(self):
element = xml_element("w:sdt", {}, [
xml_element("w:sdtPr", {}, [
xml_element("wordml:checkbox", {}, [
xml_element("wordml:checked", {"wordml:val": "1"}),
]),
]),
])

result = _read_and_get_document_xml_element(element)

assert_that(result, is_checkbox(checked=True))

def _complex_field_checkbox_paragraph(self, ff_data_children):
return xml_element("w:p", {}, [
xml_element("w:r", {}, [
Expand Down

0 comments on commit 7c58744

Please sign in to comment.