gerby-project · kodyvajjha · Mar 14, 2019
diff --git a/plasTeX/TeX.py b/plasTeX/TeX.py
@@ -318,47 +318,51 @@ def __iter__(self):
         ELEMENT_NODE = Macro.ELEMENT_NODE
 
         while 1:
-            # Get the next token
-            token = next()
+            try:
+                # Get the next token
+                token = next()
 
-            # Token is null, ignore it
-            if token is None:
-                continue
+                # Token is null, ignore it
+                if token is None:
+                    continue
 
-            # Macro that has already been expanded
-            elif token.nodeType == ELEMENT_NODE:
-                pass
+                # Macro that has already been expanded
+                elif token.nodeType == ELEMENT_NODE:
+                    pass
 
-            # We need to expand this one
-            elif token.macroName is not None:
-                try:
-                    # By default, invoke() should put the macro instance
-                    # itself into the output stream.  We'll handle this
-                    # automatically here if `None' is received.  If you
-                    # really don't want anything in the output stream,
-                    # just return `[ ]'.
-                    obj = createElement(token.macroName)
-                    obj.contextDepth = token.contextDepth
-                    obj.parentNode = token.parentNode
-                    tokens = obj.invoke(self)
-                    if tokens is None:
+                # We need to expand this one
+                elif token.macroName is not None:
+                    try:
+                        # By default, invoke() should put the macro instance
+                        # itself into the output stream.  We'll handle this
+                        # automatically here if `None' is received.  If you
+                        # really don't want anything in the output stream,
+                        # just return `[ ]'.
+                        obj = createElement(token.macroName)
+                        obj.contextDepth = token.contextDepth
+                        obj.parentNode = token.parentNode
+                        tokens = obj.invoke(self)
+                        if tokens is None:
 #                       log.info('expanding %s %s', token.macroName, obj)
-                        pushToken(obj)
-                    elif tokens:
+                            pushToken(obj)
+                        elif tokens:
 #                       log.info('expanding %s %s', token.macroName, ''.join([x.source for x in tokens]))
-                        pushTokens(tokens)
-                    continue
-                except Exception as message:
-                    msg = str(message)
-                    if msg.strip():
-                        msg = ' (%s)' % msg.strip()
-                    log.error('Error while expanding "%s"%s%s',
-                              token.macroName, self.lineInfo, msg)
-                    raise
+                            pushTokens(tokens)
+                        continue
+                    except Exception as message:
+                        msg = str(message)
+                        if msg.strip():
+                            msg = ' (%s)' % msg.strip()
+                        log.error('Error while expanding "%s"%s%s',
+                                  token.macroName, self.lineInfo, msg)
+                        raise
 
 #           tokenlog.debug('%s: %s', type(token), token.ownerDocument)
 
-            yield token
+                yield token
+            except StopIteration:
+                return
+
 
     def createSubProcess(self):
         """

diff --git a/plasTeX/Tokenizer.py b/plasTeX/Tokenizer.py
@@ -278,38 +278,41 @@ def _read1():
             return read(1)
 
         while True:
-            token = _read1()
+            try:
+                token = _read1()
 
-            if not token:
-                break
-
-            if token == '\n':
-                self.lineNumber += 1
-                self.context.meta["lines"] += 1
+                if not token:
+                    break
 
-            code = whichCode(token)
+                if token == '\n':
+                    self.lineNumber += 1
+                    self.context.meta["lines"] += 1
 
-            if code == CC_SUPER:
+                code = whichCode(token)
 
-                # Handle characters like ^^M, ^^@, etc.
-                next_char = _read1()
+                if code == CC_SUPER:
 
-                if next_char != token:
-                    self.pushChar(next_char)
-                else:
+                    # Handle characters like ^^M, ^^@, etc.
                     next_char = _read1()
-                    num = ord(next_char)
-                    if num >= 64:
-                        token = chr(num-64)
-                    else:
-                        token = chr(num+64)
-                    code = whichCode(token)
 
-            # Just go to the next character if you see one of these...
-            if code == CC_IGNORED or code == CC_INVALID:
-                continue
+                    if next_char != token:
+                        self.pushChar(next_char)
+                    else:
+                        next_char = _read1()
+                        num = ord(next_char)
+                        if num >= 64:
+                            token = chr(num-64)
+                        else:
+                            token = chr(num+64)
+                        code = whichCode(token)
+
+                # Just go to the next character if you see one of these...
+                if code == CC_IGNORED or code == CC_INVALID:
+                    continue
 
-            yield classes[code](token)
+                yield classes[code](token)
+            except StopIteration:
+                return
 
     def pushChar(self, char):
         """
@@ -377,122 +380,124 @@ def __iter__(self):
         prev = None
 
         while 1:
+            try:
+                # Purge mybuffer first
+                while mybuffer:
+                    yield mybuffer.pop(0)
 
-            # Purge mybuffer first
-            while mybuffer:
-                yield mybuffer.pop(0)
-
-            # Get the next character
-            token = next(charIter)
+                # Get the next character
+                token = next(charIter)
 
-            if token.nodeType == ELEMENT_NODE:
-                raise ValueError('Expanded tokens should never make it here')
+                if token.nodeType == ELEMENT_NODE:
+                    raise ValueError('Expanded tokens should never make it here')
 
-            code = token.catcode
+                code = token.catcode
 
-            # Short circuit letters and other since they are so common
-            if code == CC_LETTER or code == CC_OTHER:
-                self.state = STATE_M
+                # Short circuit letters and other since they are so common
+                if code == CC_LETTER or code == CC_OTHER:
+                    self.state = STATE_M
 
-            # Whitespace
-            elif code == CC_SPACE:
-                if self.state  == STATE_S or self.state == STATE_N:
-                    continue
-                self.state = STATE_S
-                token = Space(' ')
-
-            # End of line
-            elif code == CC_EOL:
-                state = self.state
-                if state == STATE_S:
-                    self.state = STATE_N
-                    continue
-                elif state == STATE_M:
-                    token = Space(' ')
-                    code = CC_SPACE
-                    self.state = STATE_N
-                elif state == STATE_N:
-                    # ord(token) != 10 is the same as saying token != '\n'
-                    # but it is much faster.
-                    if ord(token) != 10:
-                        self.lineNumber += 1
-                        self.readline()
-                    token = EscapeSequence('par')
-                    # Prevent adjacent paragraphs
-                    if prev == token:
+                # Whitespace
+                elif code == CC_SPACE:
+                    if self.state  == STATE_S or self.state == STATE_N:
                         continue
-                    code = CC_ESCAPE
-
-            # Escape sequence
-            elif code == CC_ESCAPE:
-
-                # Get name of command sequence
-                self.state = STATE_M
-
-                for token in charIter:
-
-                    if token.catcode == CC_LETTER:
-                        word = [token]
-                        for t in charIter:
-                            if t.catcode == CC_LETTER:
-                                word.append(t)
-                            else:
-                                pushChar(t)
-                                break
-                        token = EscapeSequence(''.join(word))
+                    self.state = STATE_S
+                    token = Space(' ')
 
-                    elif token.catcode == CC_EOL:
-                        #pushChar(token)
-                        #token = EscapeSequence()
+                # End of line
+                elif code == CC_EOL:
+                    state = self.state
+                    if state == STATE_S:
+                        self.state = STATE_N
+                        continue
+                    elif state == STATE_M:
                         token = Space(' ')
-                        self.state = STATE_S
+                        code = CC_SPACE
+                        self.state = STATE_N
+                    elif state == STATE_N:
+                        # ord(token) != 10 is the same as saying token != '\n'
+                        # but it is much faster.
+                        if ord(token) != 10:
+                            self.lineNumber += 1
+                            self.readline()
+                        token = EscapeSequence('par')
+                        # Prevent adjacent paragraphs
+                        if prev == token:
+                            continue
+                        code = CC_ESCAPE
+
+                # Escape sequence
+                elif code == CC_ESCAPE:
+
+                    # Get name of command sequence
+                    self.state = STATE_M
+
+                    for token in charIter:
+
+                        if token.catcode == CC_LETTER:
+                            word = [token]
+                            for t in charIter:
+                                if t.catcode == CC_LETTER:
+                                    word.append(t)
+                                else:
+                                    pushChar(t)
+                                    break
+                            token = EscapeSequence(''.join(word))
+
+                        elif token.catcode == CC_EOL:
+                            #pushChar(token)
+                            #token = EscapeSequence()
+                            token = Space(' ')
+                            self.state = STATE_S
 
-                    else:
-                        token = EscapeSequence(token)
+                        else:
+                            token = EscapeSequence(token)
 #
 # Because we can implement macros both in LaTeX and Python, we don't
 # always want the whitespace to be eaten.  For example, implementing
 # \chardef\%=`% would be \char{`%} in TeX, but in Python it's just
 # another macro class that would eat whitspace incorrectly.  So we
 # have to do this kind of thing in the parse() method of Macro.
 #
-                    if token.catcode != CC_EOL:
+                        if token.catcode != CC_EOL:
 # HACK: I couldn't get the parse() thing to work so I'm just not
 #       going to parse whitespace after EscapeSequences that end in
 #       non-letter characters as a half-assed solution.
-                        if token[-1] in encoding.stringletters():
-                            # Absorb following whitespace
-                            self.state = STATE_S
+                            if token[-1] in encoding.stringletters():
+                                # Absorb following whitespace
+                                self.state = STATE_S
 
-                    break
+                        break
+
+                    else: token = EscapeSequence()
 
-                else: token = EscapeSequence()
+                    # Check for any \let aliases
+                    token = context.lets.get(token, token)
 
-                # Check for any \let aliases
-                token = context.lets.get(token, token)
+                    # TODO: This action should be generalized so that the
+                    #       tokens are processed recursively
+                    if token is not token and token.catcode == CC_COMMENT:
+                        self.readline()
+                        self.lineNumber += 1
+                        self.state = STATE_N
+                        continue
 
-                # TODO: This action should be generalized so that the
-                #       tokens are processed recursively
-                if token is not token and token.catcode == CC_COMMENT:
+                elif code == CC_COMMENT:
                     self.readline()
                     self.lineNumber += 1
                     self.state = STATE_N
                     continue
 
-            elif code == CC_COMMENT:
-                self.readline()
-                self.lineNumber += 1
-                self.state = STATE_N
-                continue
-
-            elif code == CC_ACTIVE:
-                token = EscapeSequence('active::%s' % token)
-                token = context.lets.get(token, token)
-                self.state = STATE_M
+                elif code == CC_ACTIVE:
+                    token = EscapeSequence('active::%s' % token)
+                    token = context.lets.get(token, token)
+                    self.state = STATE_M
 
-            else:
-                self.state = STATE_M
+                else:
+                    self.state = STATE_M
 
-            prev = token
+                prev = token
 
-            yield token
+                yield token
+            except StopIteration:
+                return