forked from phuel/tables_extended
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtables_extended.py
385 lines (328 loc) · 13.4 KB
/
tables_extended.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
"""
Extended Tables Extension for Python-Markdown
=============================================
Allows to create tables with colspans and rowspans and optional
header with Python-Markdown
Copyright 2021 phuel
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
based on:
Tables Extension for Python-Markdown
====================================
Added parsing of tables to Python-Markdown.
See <https://Python-Markdown.github.io/extensions/tables>
for documentation.
Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
All changes Copyright 2008-2014 The Python Markdown Project
License: [BSD](https://opensource.org/licenses/bsd-license.php)
And
Table Cell and Row Span extension for Python Markdown
=====================================================
Adds spanning for rows and cells in tables.
Author: Neepawa
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
"""
from markdown import Extension
from markdown.blockprocessors import BlockProcessor
import xml.etree.ElementTree as etree
import re
PIPE_NONE = 0
PIPE_LEFT = 1
PIPE_RIGHT = 2
class Cell():
def __init__(self, text, align):
self.text = None
if text != '':
self.text = text.strip(' ')
self.align = align
self.colspan = 1
self.rowspan = 1
self.valign = None
class TableProcessor(BlockProcessor):
""" Process Tables. """
SEPARATOR_CHARS_SET = set('|:- ')
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
RE_row_span_marker = re.compile(r'^_[_^=\- ]*_$')
RE_valign_top = re.compile(r'.*\^')
RE_valign_bottom = re.compile(r'.*=')
RE_valign_middle = re.compile(r'.*-')
def __init__(self, parser):
self.border = False
self.separator = ''
super().__init__(parser)
def test(self, parent, block):
"""
Ensure first two rows (column header and separator row) are valid table rows.
Keep border check and separator row do avoid repeating the work.
"""
is_table = False
rows = [row.strip(' ') for row in block.split('\n')]
if len(rows) > 1:
# The separator row can either be the first or the second row.
# If the separator row is the first row, no table header is generated.
self.separator_row = self._get_separator_row(rows)
if self.separator_row < 0:
return False
self.separator = self._split_row(rows[self.separator_row])
# Use the first row (separator or text) to determine the border.
row0 = rows[0]
self.border = PIPE_NONE
if row0.startswith('|'):
self.border |= PIPE_LEFT
if self.RE_END_BORDER.search(row0) is not None:
self.border |= PIPE_RIGHT
# Use the first text row to determine the column count.
if self.separator_row == 0:
row0 = rows[1]
row = self._split_row(row0)
row0_len = len(row)
is_table = row0_len > 1
# Each row in a single column table needs at least one pipe.
if not is_table and row0_len == 1 and self.border:
for index in range(1, len(rows)):
is_table = rows[index].startswith('|')
if not is_table:
is_table = self.RE_END_BORDER.search(rows[index]) is not None
if not is_table:
break
if is_table:
row = self._split_row(rows[self.separator_row])
is_table = (len(row) == row0_len) and set(''.join(row)) <= self.SEPARATOR_CHARS_SET
if is_table:
self.separator = row
return is_table
def run(self, parent, blocks):
""" Parse a table block and build table. """
block = blocks.pop(0).split('\n')
header = None
rows = None
if self.separator_row > 0:
header = block[0]
rows = block[2:]
else:
rows = block[1:]
# Get alignment of columns
align = []
for c in self.separator:
c = c.strip(' ')
if c.startswith(':') and c.endswith(':'):
align.append('center')
elif c.startswith(':'):
align.append('left')
elif c.endswith(':'):
align.append('right')
else:
align.append(None)
# Build table
table = etree.SubElement(parent, 'table')
if header is not None:
thead = etree.SubElement(table, 'thead')
cells = self._parse_row(header, align)
self._merge_spans([ cells ])
self._build_row(cells, thead)
tbody = etree.SubElement(table, 'tbody')
if len(rows) == 0:
# Handle empty table
self._build_empty_row(tbody, align)
else:
cells = []
for row in rows:
row_cells = self._parse_row(row, align)
cells.append(row_cells)
self._merge_spans(cells)
for row in cells:
self._build_row(row, tbody)
def _get_separator_row(self, rows):
"""Search for the separator row.
Returns the row index (either 0, 1 or -1 (if no separator row has been found))."""
if set(''.join(rows[0])) <= self.SEPARATOR_CHARS_SET:
return 0
if set(''.join(rows[1])) <= self.SEPARATOR_CHARS_SET:
return 1
return -1
def _build_empty_row(self, parent, align):
"""Build an empty row."""
tr = etree.SubElement(parent, 'tr')
count = len(align)
while count:
etree.SubElement(tr, 'td')
count -= 1
def _build_row(self, cells, parent):
""" Given a row of parsed cells, build table cells. """
tr = etree.SubElement(parent, 'tr')
tag = 'td'
if parent.tag == 'thead':
tag = 'th'
for cell in cells:
if cell is None:
continue
c = etree.SubElement(tr, tag)
c.text = cell.text
if cell.align is not None:
c.set('align', cell.align)
if cell.colspan > 1:
c.set('colspan', str(cell.colspan))
if cell.rowspan > 1:
c.set('rowspan', str(cell.rowspan))
if cell.valign is not None:
c.set('valign', cell.valign)
def _merge_spans(self, rows):
"""Transforms adjacent empty cells into colspans or rowspans."""
for row_index in range(len(rows)):
for col_index in range(len(rows[row_index])):
self._merge_spans_for_cell(rows, row_index, col_index)
def _merge_spans_for_cell(self, rows, row_index, col_index):
"""Starting from one cell transforms adjacent empty cells into colspans or rowspans."""
nrows = len(rows)
row = rows[row_index]
ncols = len(row)
cell = row[col_index]
# If the cell is already collapsed nothing needs to be done anymore.
if cell is None:
return
# Check for empty cells to collapse right of the current cell.
right = col_index + 1
while right < ncols:
if row[right] is None:
break
if row[right].text is not None:
break
cell.colspan += 1
right += 1
# Check for empty cells to collapse below the current cell.
rowspan_found = False
down = row_index + 1
while down < nrows:
if rows[down][col_index] is None:
break
text = rows[down][col_index].text
if text is not None:
if self.RE_row_span_marker.match(text):
# The end marker was found, so include this cell
# in the rowspan and set the vertical alignment.
cell.rowspan += 1
if self.RE_valign_top.match(text):
cell.valign = 'top'
if self.RE_valign_bottom.match(text):
if cell.valign is not None:
raise ValueError('Can only use one of ^ (top), - (middle) or = (bottom) codes in one row span marker.')
cell.valign = 'bottom'
if self.RE_valign_middle.match(text):
if cell.valign is not None:
raise ValueError('Can only use one of ^ (top), - (middle) or = (bottom) codes in one row span marker.')
cell.valign = 'middle'
rowspan_found = True
break
elif text.strip(' ') != '':
break
cell.rowspan += 1
down += 1
if not rowspan_found:
cell.rowspan = 1
# Replace all collapsed cells with None.
for r in range(0, cell.rowspan):
for c in range(0, cell.colspan):
if r == 0 and c == 0:
# Keep the current cell.
continue
rows[row_index + r][col_index + c] = None
def _parse_row(self, row, align):
"""Split a row and create cells for each row item."""
texts = self._split_row(row)
cells = [ None ] * len(align)
# We use align here rather than cells to ensure every row
# contains the same number of columns.
for i, a in enumerate(align):
try:
cells[i] = Cell(texts[i], a)
except IndexError: # pragma: no cover
cells[i] = Cell('', a)
return cells
def _split_row(self, row):
""" split a row of text into list of cells. """
row = row.strip(' ')
if self.border:
if row.startswith('|'):
row = row[1:]
row = self.RE_END_BORDER.sub('', row)
else:
# Add space characters in front and back to avoid mistaking empty
# cells at the beginning or end as cells that have to be collapsed.
row = ' ' + row + ' '
return self._split(row)
def _split(self, row):
""" split a row of text with some code into a list of cells. """
elements = []
pipes = []
tics = []
tic_points = []
tic_region = []
good_pipes = []
# Parse row
# Throw out \\, and \|
for m in self.RE_CODE_PIPES.finditer(row):
# Store ` data (len, start_pos, end_pos)
if m.group(2):
# \`+
# Store length of each tic group: subtract \
tics.append(len(m.group(2)) - 1)
# Store start of group, end of group, and escape length
tic_points.append((m.start(2), m.end(2) - 1, 1))
elif m.group(3):
# `+
# Store length of each tic group
tics.append(len(m.group(3)))
# Store start of group, end of group, and escape length
tic_points.append((m.start(3), m.end(3) - 1, 0))
# Store pipe location
elif m.group(5):
pipes.append(m.start(5))
# Pair up tics according to size if possible
# Subtract the escape length *only* from the opening.
# Walk through tic list and see if tic has a close.
# Store the tic region (start of region, end of region).
pos = 0
tic_len = len(tics)
while pos < tic_len:
try:
tic_size = tics[pos] - tic_points[pos][2]
if tic_size == 0:
raise ValueError
index = tics[pos + 1:].index(tic_size) + 1
tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
pos += index + 1
except ValueError:
pos += 1
# Resolve pipes. Check if they are within a tic pair region.
# Walk through pipes comparing them to each region.
# - If pipe position is less that a region, it isn't in a region
# - If it is within a region, we don't want it, so throw it out
# - If we didn't throw it out, it must be a table pipe
for pipe in pipes:
throw_out = False
for region in tic_region:
if pipe < region[0]:
# Pipe is not in a region
break
elif region[0] <= pipe <= region[1]:
# Pipe is within a code region. Throw it out.
throw_out = True
break
if not throw_out:
good_pipes.append(pipe)
# Split row according to table delimeters.
pos = 0
for pipe in good_pipes:
elements.append(row[pos:pipe])
pos = pipe + 1
elements.append(row[pos:])
return elements
class TableExtension(Extension):
""" Add tables to Markdown. """
def extendMarkdown(self, md):
""" Add an instance of TableProcessor to BlockParser. """
if '|' not in md.ESCAPED_CHARS:
md.ESCAPED_CHARS.append('|')
md.parser.blockprocessors.register(TableProcessor(md.parser), 'tables_extended', 75.1)
def makeExtension(**kwargs): # pragma: no cover
return TableExtension(**kwargs)