Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modifying TeX.py and Tokenizer.py to work with Python 3.7 changes #58

Open
wants to merge 1 commit into
base: gerby
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 37 additions & 33 deletions plasTeX/TeX.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,47 +318,51 @@ def __iter__(self):
ELEMENT_NODE = Macro.ELEMENT_NODE

while 1:
# Get the next token
token = next()
try:
# Get the next token
token = next()

# Token is null, ignore it
if token is None:
continue
# Token is null, ignore it
if token is None:
continue

# Macro that has already been expanded
elif token.nodeType == ELEMENT_NODE:
pass
# Macro that has already been expanded
elif token.nodeType == ELEMENT_NODE:
pass

# We need to expand this one
elif token.macroName is not None:
try:
# By default, invoke() should put the macro instance
# itself into the output stream. We'll handle this
# automatically here if `None' is received. If you
# really don't want anything in the output stream,
# just return `[ ]'.
obj = createElement(token.macroName)
obj.contextDepth = token.contextDepth
obj.parentNode = token.parentNode
tokens = obj.invoke(self)
if tokens is None:
# We need to expand this one
elif token.macroName is not None:
try:
# By default, invoke() should put the macro instance
# itself into the output stream. We'll handle this
# automatically here if `None' is received. If you
# really don't want anything in the output stream,
# just return `[ ]'.
obj = createElement(token.macroName)
obj.contextDepth = token.contextDepth
obj.parentNode = token.parentNode
tokens = obj.invoke(self)
if tokens is None:
# log.info('expanding %s %s', token.macroName, obj)
pushToken(obj)
elif tokens:
pushToken(obj)
elif tokens:
# log.info('expanding %s %s', token.macroName, ''.join([x.source for x in tokens]))
pushTokens(tokens)
continue
except Exception as message:
msg = str(message)
if msg.strip():
msg = ' (%s)' % msg.strip()
log.error('Error while expanding "%s"%s%s',
token.macroName, self.lineInfo, msg)
raise
pushTokens(tokens)
continue
except Exception as message:
msg = str(message)
if msg.strip():
msg = ' (%s)' % msg.strip()
log.error('Error while expanding "%s"%s%s',
token.macroName, self.lineInfo, msg)
raise

# tokenlog.debug('%s: %s', type(token), token.ownerDocument)

yield token
yield token
except StopIteration:
return


def createSubProcess(self):
"""
Expand Down
229 changes: 117 additions & 112 deletions plasTeX/Tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,38 +278,41 @@ def _read1():
return read(1)

while True:
token = _read1()
try:
token = _read1()

if not token:
break

if token == '\n':
self.lineNumber += 1
self.context.meta["lines"] += 1
if not token:
break

code = whichCode(token)
if token == '\n':
self.lineNumber += 1
self.context.meta["lines"] += 1

if code == CC_SUPER:
code = whichCode(token)

# Handle characters like ^^M, ^^@, etc.
next_char = _read1()
if code == CC_SUPER:

if next_char != token:
self.pushChar(next_char)
else:
# Handle characters like ^^M, ^^@, etc.
next_char = _read1()
num = ord(next_char)
if num >= 64:
token = chr(num-64)
else:
token = chr(num+64)
code = whichCode(token)

# Just go to the next character if you see one of these...
if code == CC_IGNORED or code == CC_INVALID:
continue
if next_char != token:
self.pushChar(next_char)
else:
next_char = _read1()
num = ord(next_char)
if num >= 64:
token = chr(num-64)
else:
token = chr(num+64)
code = whichCode(token)

# Just go to the next character if you see one of these...
if code == CC_IGNORED or code == CC_INVALID:
continue

yield classes[code](token)
yield classes[code](token)
except StopIteration:
return

def pushChar(self, char):
"""
Expand Down Expand Up @@ -377,122 +380,124 @@ def __iter__(self):
prev = None

while 1:
try:
# Purge mybuffer first
while mybuffer:
yield mybuffer.pop(0)

# Purge mybuffer first
while mybuffer:
yield mybuffer.pop(0)

# Get the next character
token = next(charIter)
# Get the next character
token = next(charIter)

if token.nodeType == ELEMENT_NODE:
raise ValueError('Expanded tokens should never make it here')
if token.nodeType == ELEMENT_NODE:
raise ValueError('Expanded tokens should never make it here')

code = token.catcode
code = token.catcode

# Short circuit letters and other since they are so common
if code == CC_LETTER or code == CC_OTHER:
self.state = STATE_M
# Short circuit letters and other since they are so common
if code == CC_LETTER or code == CC_OTHER:
self.state = STATE_M

# Whitespace
elif code == CC_SPACE:
if self.state == STATE_S or self.state == STATE_N:
continue
self.state = STATE_S
token = Space(' ')

# End of line
elif code == CC_EOL:
state = self.state
if state == STATE_S:
self.state = STATE_N
continue
elif state == STATE_M:
token = Space(' ')
code = CC_SPACE
self.state = STATE_N
elif state == STATE_N:
# ord(token) != 10 is the same as saying token != '\n'
# but it is much faster.
if ord(token) != 10:
self.lineNumber += 1
self.readline()
token = EscapeSequence('par')
# Prevent adjacent paragraphs
if prev == token:
# Whitespace
elif code == CC_SPACE:
if self.state == STATE_S or self.state == STATE_N:
continue
code = CC_ESCAPE

# Escape sequence
elif code == CC_ESCAPE:

# Get name of command sequence
self.state = STATE_M

for token in charIter:

if token.catcode == CC_LETTER:
word = [token]
for t in charIter:
if t.catcode == CC_LETTER:
word.append(t)
else:
pushChar(t)
break
token = EscapeSequence(''.join(word))
self.state = STATE_S
token = Space(' ')

elif token.catcode == CC_EOL:
#pushChar(token)
#token = EscapeSequence()
# End of line
elif code == CC_EOL:
state = self.state
if state == STATE_S:
self.state = STATE_N
continue
elif state == STATE_M:
token = Space(' ')
self.state = STATE_S
code = CC_SPACE
self.state = STATE_N
elif state == STATE_N:
# ord(token) != 10 is the same as saying token != '\n'
# but it is much faster.
if ord(token) != 10:
self.lineNumber += 1
self.readline()
token = EscapeSequence('par')
# Prevent adjacent paragraphs
if prev == token:
continue
code = CC_ESCAPE

# Escape sequence
elif code == CC_ESCAPE:

# Get name of command sequence
self.state = STATE_M

for token in charIter:

if token.catcode == CC_LETTER:
word = [token]
for t in charIter:
if t.catcode == CC_LETTER:
word.append(t)
else:
pushChar(t)
break
token = EscapeSequence(''.join(word))

elif token.catcode == CC_EOL:
#pushChar(token)
#token = EscapeSequence()
token = Space(' ')
self.state = STATE_S

else:
token = EscapeSequence(token)
else:
token = EscapeSequence(token)
#
# Because we can implement macros both in LaTeX and Python, we don't
# always want the whitespace to be eaten. For example, implementing
# \chardef\%=`% would be \char{`%} in TeX, but in Python it's just
# another macro class that would eat whitspace incorrectly. So we
# have to do this kind of thing in the parse() method of Macro.
#
if token.catcode != CC_EOL:
if token.catcode != CC_EOL:
# HACK: I couldn't get the parse() thing to work so I'm just not
# going to parse whitespace after EscapeSequences that end in
# non-letter characters as a half-assed solution.
if token[-1] in encoding.stringletters():
# Absorb following whitespace
self.state = STATE_S
if token[-1] in encoding.stringletters():
# Absorb following whitespace
self.state = STATE_S

break
break

else: token = EscapeSequence()

else: token = EscapeSequence()
# Check for any \let aliases
token = context.lets.get(token, token)

# Check for any \let aliases
token = context.lets.get(token, token)
# TODO: This action should be generalized so that the
# tokens are processed recursively
if token is not token and token.catcode == CC_COMMENT:
self.readline()
self.lineNumber += 1
self.state = STATE_N
continue

# TODO: This action should be generalized so that the
# tokens are processed recursively
if token is not token and token.catcode == CC_COMMENT:
elif code == CC_COMMENT:
self.readline()
self.lineNumber += 1
self.state = STATE_N
continue

elif code == CC_COMMENT:
self.readline()
self.lineNumber += 1
self.state = STATE_N
continue

elif code == CC_ACTIVE:
token = EscapeSequence('active::%s' % token)
token = context.lets.get(token, token)
self.state = STATE_M
elif code == CC_ACTIVE:
token = EscapeSequence('active::%s' % token)
token = context.lets.get(token, token)
self.state = STATE_M

else:
self.state = STATE_M
else:
self.state = STATE_M

prev = token
prev = token

yield token
yield token
except StopIteration:
return