From 237d861c147fd6027b68c910425ebb361dd7b0e2 Mon Sep 17 00:00:00 2001 From: Michael Williamson Date: Sat, 30 Nov 2024 14:03:35 +0000 Subject: [PATCH] Read whether complex field checkbox is checked --- mammoth/docx/body_xml.py | 17 ++++++- tests/docx/body_xml_tests.py | 93 ++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 2 deletions(-) diff --git a/mammoth/docx/body_xml.py b/mammoth/docx/body_xml.py index 5e891fd5..134917fb 100644 --- a/mammoth/docx/body_xml.py +++ b/mammoth/docx/body_xml.py @@ -134,7 +134,10 @@ def _read_run_style(properties): return _read_style(properties, "w:rStyle", "Run", styles.find_character_style_by_id) def read_boolean_element(element): - return element and element.attributes.get("w:val") not in ["false", "0"] + if element is None: + return False + else: + return element.attributes.get("w:val") not in ["false", "0"] def read_underline_element(element): return element and element.attributes.get("w:val") not in [None, "false", "0", "none"] @@ -231,7 +234,17 @@ def parse_instr_text(instr_text, *, fld_char): checkbox_result = re.match(r'\s*FORMCHECKBOX\s*', instr_text) if checkbox_result is not None: - return complex_fields.checkbox(checked=False) + checkbox_element = fld_char \ + .find_child_or_null("w:ffData") \ + .find_child_or_null("w:checkBox") + checked_element = checkbox_element.find_child("w:checked") + + if checked_element is None: + checked = read_boolean_element(checkbox_element.find_child("w:default")) + else: + checked = read_boolean_element(checked_element) + + return complex_fields.checkbox(checked=checked) return None diff --git a/tests/docx/body_xml_tests.py b/tests/docx/body_xml_tests.py index cb5a9b5b..8c1fdf08 100644 --- a/tests/docx/body_xml_tests.py +++ b/tests/docx/body_xml_tests.py @@ -676,6 +676,99 @@ def test_complex_field_checkbox_with_separate_is_read(self): is_run(children=is_sequence(is_checkbox())), ))) + def test_complex_field_checkbox_without_default_nor_checked_is_unchecked(self): + element = self._complex_field_checkbox_paragraph([ + xml_element("w:checkBox"), + ]) + + paragraph = _read_and_get_document_xml_element(element); + + assert_that(paragraph, is_paragraph(children=is_sequence( + is_empty_run, + is_empty_run, + is_run(children=is_sequence(is_checkbox(checked=False))), + ))) + + def test_complex_field_checkbox_with_default_0_and_without_checked_is_unchecked(self): + element = self._complex_field_checkbox_paragraph([ + xml_element("w:checkBox", {}, [ + xml_element("w:default", {"w:val": "0"}), + ]), + ]) + + paragraph = _read_and_get_document_xml_element(element); + + assert_that(paragraph, is_paragraph(children=is_sequence( + is_empty_run, + is_empty_run, + is_run(children=is_sequence(is_checkbox(checked=False))), + ))) + + def test_complex_field_checkbox_with_default_1_and_without_checked_is_checked(self): + element = self._complex_field_checkbox_paragraph([ + xml_element("w:checkBox", {}, [ + xml_element("w:default", {"w:val": "1"}), + ]), + ]) + + paragraph = _read_and_get_document_xml_element(element); + + assert_that(paragraph, is_paragraph(children=is_sequence( + is_empty_run, + is_empty_run, + is_run(children=is_sequence(is_checkbox(checked=True))), + ))) + + def test_complex_field_checkbox_with_default_1_and_checked_0_is_unchecked(self): + element = self._complex_field_checkbox_paragraph([ + xml_element("w:checkBox", {}, [ + xml_element("w:default", {"w:val": "1"}), + xml_element("w:checked", {"w:val": "0"}), + ]), + ]) + + paragraph = _read_and_get_document_xml_element(element); + + assert_that(paragraph, is_paragraph(children=is_sequence( + is_empty_run, + is_empty_run, + is_run(children=is_sequence(is_checkbox(checked=False))), + ))) + + def test_complex_field_checkbox_with_default_0_and_checked_1_is_checked(self): + element = self._complex_field_checkbox_paragraph([ + xml_element("w:checkBox", {}, [ + xml_element("w:default", {"w:val": "0"}), + xml_element("w:checked", {"w:val": "1"}), + ]), + ]) + + paragraph = _read_and_get_document_xml_element(element); + + assert_that(paragraph, is_paragraph(children=is_sequence( + is_empty_run, + is_empty_run, + is_run(children=is_sequence(is_checkbox(checked=True))), + ))) + + def _complex_field_checkbox_paragraph(self, ff_data_children): + return xml_element("w:p", {}, [ + xml_element("w:r", {}, [ + xml_element("w:fldChar", {"w:fldCharType": "begin"}, [ + xml_element("w:ffData", {}, ff_data_children) + ]), + ]), + xml_element("w:instrText", {}, [ + xml_text(' FORMCHECKBOX ') + ]), + xml_element("w:r", {}, [ + xml_element("w:fldChar", {"w:fldCharType": "separate"}) + ]), + xml_element("w:r", {}, [ + xml_element("w:fldChar", {"w:fldCharType": "end"}) + ]), + ]) + def test_can_read_tab_element(): element = xml_element("w:tab")