Added comments to lexer, linker, and parser (#2)

albi-c · Jun 1, 2022 · cc878d1 · cc878d1
1 parent 61c526b
commit cc878d1
Show file tree

Hide file tree

Showing 3 changed files with 302 additions and 107 deletions.
diff --git a/mlogpp/lexer.py b/mlogpp/lexer.py
@@ -5,6 +5,10 @@
 from .formatting import Format
 
 class TokenType(Enum):
+    """
+    token types
+    """
+
     NONE      = 0
     ID        = 2
     STRING    = 3
@@ -22,6 +26,7 @@ class TokenType(Enum):
     LOGIC     = 15
     DOT       = 16
 
+# token regexes
 LEX_REGEXES = {
     TokenType.ID: re.compile(r"^[a-zA-Z_@][a-zA-Z_0-9]*$"),
     TokenType.STRING: re.compile("^\"([^\"\\\\]|\\\\.)*\"$"),
@@ -41,7 +46,15 @@ class TokenType(Enum):
 }
 
 class Position:
+    """
+    token position
+    """
+
     def __init__(self, line: int, column: int, cline: str, len_: int):
+        """
+        token position
+        """
+
         self.line = line
         self.column = column
         self.cline = cline
@@ -57,23 +70,48 @@ def __eq__(self, other) -> bool:
         return self.line == other.line and self.column == other.column
 
 class Token:
+    """
+    token
+    """
+
     def __init__(self, type_: TokenType, value: str, line: int, col: int, cline: str):
+        """
+        token
+        """
+
         self.type = type_
         self.value = value
         self.line = line
         self.col = col
         self.cline = cline
 
+    @staticmethod
     def _sanitize(s: str) -> str:
+        """
+        sanitize a string
+        """
+
         return s.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
 
     def sanitize_value(self) -> str:
+        """
+        sanitize the token value
+        """
+
         return Token._sanitize(self.value)
 
     def sanitize_cline(self) -> str:
+        """
+        sanitize the token's code line
+        """
+
         return Token._sanitize(self.cline)
 
     def pos(self) -> Position:
+        """
+        generate the position of the token
+        """
+
         return Position(self.line, self.col, self.cline, len(self.value))
 
     def __repr__(self) -> str:
@@ -86,13 +124,24 @@ def __eq__(self, other) -> bool:
         return self.type == other.type and self.value == other.value
 
 class Lexer:
+    """
+    splits code into tokens, preprocessing
+    """
+
     @staticmethod
     def resolve_includes(code: str) -> str:
+        """
+        resolve includes in code
+        """
+
+        # iterate over lines
         tmp = ""
         for i, ln in enumerate(code.splitlines()):
+            # check if the line is an include
             if ln.startswith("%"):
                 fn = ln[1:]
 
+                # check if the file exists
                 if not os.path.isfile(fn):
                     print(f"{Format.ERROR}Error on line {i + 1}: Cannot import file \"{fn}\"{Format.RESET}\n\nHere:\n{ln}\n{arrows.generate(0, len(ln))}")
                     sys.exit(1)
@@ -109,36 +158,53 @@ def resolve_includes(code: str) -> str:
 
     @staticmethod
     def lex(code: str) -> list:
+        """
+        split code to tokens
+        """
+
         toks = []
         tok = ""
         in_str = False
 
         for li, ln in enumerate(code.splitlines()):
             st = ln.strip()
 
+            # continue if empty or comment
             if not st or st.startswith("#"):
                 continue
 
+            # make into native code if a jump or label
             if st.startswith(">") or st.startswith("<"):
                 ln = f".{st}"
 
+            # pass through as two tokens if native code
             st = ln.strip()
             if st.startswith("."):
                 ln = f".\"{st[1:]}\""
 
+            # iterate over characters in a line
             prev = ""
             for i, c in enumerate(ln):
                 if c == " " and not in_str:
+                    # character is empty and not in a string
+
                     if Lexer._match(tok) == TokenType.NONE and tok.strip():
+                        # token doesn't match anything and is not empty
+
                         print(f"{Format.ERROR}Error on line {li + 1}, column {i + 1}: Invalid token \"{tok}\"{Format.RESET}\n\nHere:\n{ln}\n{arrows.generate(i - len(tok), len(tok))}")
                         sys.exit(1)
+
                     elif tok.strip():
+                        # token matches something and is not empty
+
                         toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
                         tok = ""
 
                     continue
 
                 if c == "\"" and prev != "\\":
+                    # character is `"` and previous character isn't `\`
+
                     in_str = not in_str
 
                 matched = Lexer._match(tok)
@@ -147,29 +213,40 @@ def lex(code: str) -> list:
 
                 matches = Lexer._match(tok)
 
+                # extend `==` token to `===`
                 if tok == "==" and i < len(ln) - 1:
                     if ln[i + 1] == "=":
                         toks.append(Token(TokenType.OPERATOR, "===", li, i - len(tok), ln))
                         tok = ""
                         continue
+
+                # continue if a decimal number
                 elif c == "." and matched == TokenType.NUMBER and matches == TokenType.NONE:
                     continue
-
+
+                # token type changed
                 if ((matched != TokenType.NONE) and (matches == TokenType.NONE)) or ((matched != matches) and matched != TokenType.NONE):
                     if tok in ["+=", "-=", "*=", "/=", ">=", "<=", "==", "**"]:
+                        # special token
+
                         toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
                         tok = ""
+
                     else:
+                        #normal token
+
                         tok = tok[:-1]
                         toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
                         tok = c
 
                 prev = c
 
+                # weird workaround probably for duplication of tokens
                 if len(toks) >= 2:
                     if toks[-1].value == "=" and toks[-2].value == "===":
                         toks.pop()
 
+            # add last token if not empty and matches something
             if Lexer._match(tok) != TokenType.NONE:
                 toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
                 tok = ""
@@ -181,6 +258,10 @@ def lex(code: str) -> list:
 
     @staticmethod
     def _match(token: str) -> TokenType:
+        """
+        match a token to a type
+        """
+
         for t, r in LEX_REGEXES.items():
             if r.fullmatch(token):
                 return t
@@ -189,6 +270,10 @@ def _match(token: str) -> TokenType:
 
     @staticmethod
     def _matchtp(tokens: list, pos: int, pattern: list, *patterns: list) -> bool:
+        """
+        match a token pattern
+        """
+
         for p in patterns:
             if len(p) != len(pattern):
                 return False
@@ -206,12 +291,21 @@ def _matchtp(tokens: list, pos: int, pattern: list, *patterns: list) -> bool:
 
     @staticmethod
     def stringify_tokens(tokens: list) -> str:
+        """
+        stringify a list of tokens
+        """
+
         return "\n".join([str(t) for t in tokens])
 
     @staticmethod
     def preprocess(tokens: list) -> list:
+        """
+        preprocess a token list
+        """
+
         tmp = tokens.copy()
 
+        # find consts
         consts = {}
         found = True
         while found:
@@ -231,6 +325,7 @@ def preprocess(tokens: list) -> list:
 
                     break
 
+        # replace consts
         for i, t in enumerate(tmp):
             for k, v in consts.items():
                 if t.value == k:

diff --git a/mlogpp/linker.py b/mlogpp/linker.py
@@ -2,27 +2,45 @@
 from .error import link_error
 
 class Linker:
+    """
+    links generated code together
+    """
+
+    @staticmethod
     def link(codes: list) -> str:
+        """
+        links generated code together
+        """
+
         offset = 0
         tmp = []
         for code in codes:
+            # relocate jumps
             c, o = Linker._relocate(code, offset)
 
             tmp.append(c)
             offset += o
 
         return "\n".join(tmp)
 
+    @staticmethod
     def _relocate(code: str, offset: int) -> str:
+        """
+        relocate jumps in compiled code
+        """
+
         tmp = ""
         nl = 0
         for ln in code.strip().splitlines():
+            # check if line is a jump
             if ln.startswith("jump "):
                 spl = ln.split(" ")
 
+                # check if jump has enough arguments
                 if len(spl) != 5:
                     link_error(Position(nl, 0, ln, len(ln)), "Invalid jump instruction")
 
+                # check if jump address is valid
                 pos = spl[1]
                 try:
                     pos = int(pos)