Skip to content

Commit

Permalink
Read complex field checkboxes, always unchecked
Browse files Browse the repository at this point in the history
  • Loading branch information
mwilliamson committed Nov 30, 2024
1 parent fe7708b commit fe6fb7e
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 16 deletions.
42 changes: 29 additions & 13 deletions mammoth/docx/body_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .. import lists
from . import complex_fields
from .dingbats import dingbats
from .xmlparser import node_types, XmlElement
from .xmlparser import node_types, XmlElement, null_xml_element
from .styles_xml import Styles
from .uris import replace_fragment, uri_to_zip_entry_name

Expand Down Expand Up @@ -192,29 +192,45 @@ def current_hyperlink_kwargs():
def read_fld_char(element):
fld_char_type = element.attributes.get("w:fldCharType")
if fld_char_type == "begin":
complex_field_stack.append(complex_fields.unknown)
complex_field_stack.append(complex_fields.begin(fld_char=element))
del current_instr_text[:]

elif fld_char_type == "end":
complex_field_stack.pop()
complex_field = complex_field_stack.pop()
if isinstance(complex_field, complex_fields.Begin):
complex_field = parse_current_instr_text(complex_field)

if isinstance(complex_field, complex_fields.Checkbox):
return _success(documents.checkbox(checked=complex_field.checked))

elif fld_char_type == "separate":
instr_text = "".join(current_instr_text)
hyperlink_kwargs = parse_hyperlink_field_code(instr_text)
if hyperlink_kwargs is None:
complex_field = complex_fields.unknown
else:
complex_field = complex_fields.hyperlink(hyperlink_kwargs)
complex_field_stack.pop()
complex_field_separate = complex_field_stack.pop()
complex_field = parse_current_instr_text(complex_field_separate)
complex_field_stack.append(complex_field)
return _empty_result

def parse_hyperlink_field_code(instr_text):
def parse_current_instr_text(complex_field):
instr_text = "".join(current_instr_text)

if isinstance(complex_field, complex_fields.Begin):
fld_char = complex_field.fld_char
else:
fld_char = null_xml_element

return parse_instr_text(instr_text, fld_char=fld_char)

def parse_instr_text(instr_text, *, fld_char):
external_link_result = re.match(r'\s*HYPERLINK "(.*)"', instr_text)
if external_link_result is not None:
return dict(href=external_link_result.group(1))
return complex_fields.hyperlink(dict(href=external_link_result.group(1)))

internal_link_result = re.match(r'\s*HYPERLINK\s+\\l\s+"(.*)"', instr_text)
if internal_link_result is not None:
return dict(anchor=internal_link_result.group(1))
return complex_fields.hyperlink(dict(anchor=internal_link_result.group(1)))

checkbox_result = re.match(r'\s*FORMCHECKBOX\s*', instr_text)
if checkbox_result is not None:
return complex_fields.checkbox(checked=False)

return None

Expand Down
18 changes: 18 additions & 0 deletions mammoth/docx/complex_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,28 @@ class unknown(object):
pass


class Begin:
def __init__(self, *, fld_char):
self.fld_char = fld_char


def begin(*, fld_char):
return Begin(fld_char=fld_char)


class Hyperlink(object):
def __init__(self, kwargs):
self.kwargs = kwargs


def hyperlink(kwargs):
return Hyperlink(kwargs=kwargs)


class Checkbox:
def __init__(self, *, checked):
self.checked = checked


def checkbox(*, checked):
return Checkbox(checked=checked)
4 changes: 2 additions & 2 deletions mammoth/docx/xmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class XmlElement(object):
children = cobble.field()

def find_child_or_null(self, name):
return self.find_child(name) or _null_xml_element
return self.find_child(name) or null_xml_element

def find_child(self, name):
for child in self.children:
Expand Down Expand Up @@ -50,7 +50,7 @@ def find_child(self, name):
return None


_null_xml_element = NullXmlElement()
null_xml_element = NullXmlElement()


@cobble.data
Expand Down
47 changes: 47 additions & 0 deletions tests/docx/body_xml_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
is_empty_run,
is_run,
is_hyperlink,
is_checkbox,
is_text,
is_table,
is_row,
Expand Down Expand Up @@ -630,6 +631,52 @@ def test_field_without_separate_fld_char_is_ignored(self):
)))


class CheckboxTests:
def test_complex_field_checkbox_without_separate_is_read(self):
element = xml_element("w:p", {}, [
xml_element("w:r", {}, [
xml_element("w:fldChar", {"w:fldCharType": "begin"})
]),
xml_element("w:instrText", {}, [
xml_text(' FORMCHECKBOX ')
]),
xml_element("w:r", {}, [
xml_element("w:fldChar", {"w:fldCharType": "end"})
])
])

paragraph = _read_and_get_document_xml_element(element);

assert_that(paragraph, is_paragraph(children=is_sequence(
is_empty_run,
is_run(children=is_sequence(is_checkbox())),
)))

def test_complex_field_checkbox_with_separate_is_read(self):
element = xml_element("w:p", {}, [
xml_element("w:r", {}, [
xml_element("w:fldChar", {"w:fldCharType": "begin"})
]),
xml_element("w:instrText", {}, [
xml_text(' FORMCHECKBOX ')
]),
xml_element("w:r", {}, [
xml_element("w:fldChar", {"w:fldCharType": "separate"})
]),
xml_element("w:r", {}, [
xml_element("w:fldChar", {"w:fldCharType": "end"})
])
])

paragraph = _read_and_get_document_xml_element(element);

assert_that(paragraph, is_paragraph(children=is_sequence(
is_empty_run,
is_empty_run,
is_run(children=is_sequence(is_checkbox())),
)))


def test_can_read_tab_element():
element = xml_element("w:tab")
tab = _read_and_get_document_xml_element(element)
Expand Down
3 changes: 2 additions & 1 deletion tests/docx/document_matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ def matcher(**kwargs):
instance_of(element_type),
has_attrs(**kwargs),
)

return matcher


is_paragraph = create_element_matcher(documents.Paragraph)
is_run = create_element_matcher(documents.Run)
is_hyperlink = create_element_matcher(documents.Hyperlink)
is_checkbox = create_element_matcher(documents.Checkbox)
is_table = create_element_matcher(documents.Table)
is_row = create_element_matcher(documents.TableRow)

Expand Down

0 comments on commit fe6fb7e

Please sign in to comment.