Skip to content

Commit

Permalink
Added comments to lexer, linker, and parser (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
BuildTools committed Jun 1, 2022
1 parent 61c526b commit cc878d1
Show file tree
Hide file tree
Showing 3 changed files with 302 additions and 107 deletions.
97 changes: 96 additions & 1 deletion mlogpp/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from .formatting import Format

class TokenType(Enum):
"""
token types
"""

NONE = 0
ID = 2
STRING = 3
Expand All @@ -22,6 +26,7 @@ class TokenType(Enum):
LOGIC = 15
DOT = 16

# token regexes
LEX_REGEXES = {
TokenType.ID: re.compile(r"^[a-zA-Z_@][a-zA-Z_0-9]*$"),
TokenType.STRING: re.compile("^\"([^\"\\\\]|\\\\.)*\"$"),
Expand All @@ -41,7 +46,15 @@ class TokenType(Enum):
}

class Position:
"""
token position
"""

def __init__(self, line: int, column: int, cline: str, len_: int):
"""
token position
"""

self.line = line
self.column = column
self.cline = cline
Expand All @@ -57,23 +70,48 @@ def __eq__(self, other) -> bool:
return self.line == other.line and self.column == other.column

class Token:
"""
token
"""

def __init__(self, type_: TokenType, value: str, line: int, col: int, cline: str):
"""
token
"""

self.type = type_
self.value = value
self.line = line
self.col = col
self.cline = cline

@staticmethod
def _sanitize(s: str) -> str:
"""
sanitize a string
"""

return s.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")

def sanitize_value(self) -> str:
"""
sanitize the token value
"""

return Token._sanitize(self.value)

def sanitize_cline(self) -> str:
"""
sanitize the token's code line
"""

return Token._sanitize(self.cline)

def pos(self) -> Position:
"""
generate the position of the token
"""

return Position(self.line, self.col, self.cline, len(self.value))

def __repr__(self) -> str:
Expand All @@ -86,13 +124,24 @@ def __eq__(self, other) -> bool:
return self.type == other.type and self.value == other.value

class Lexer:
"""
splits code into tokens, preprocessing
"""

@staticmethod
def resolve_includes(code: str) -> str:
"""
resolve includes in code
"""

# iterate over lines
tmp = ""
for i, ln in enumerate(code.splitlines()):
# check if the line is an include
if ln.startswith("%"):
fn = ln[1:]

# check if the file exists
if not os.path.isfile(fn):
print(f"{Format.ERROR}Error on line {i + 1}: Cannot import file \"{fn}\"{Format.RESET}\n\nHere:\n{ln}\n{arrows.generate(0, len(ln))}")
sys.exit(1)
Expand All @@ -109,36 +158,53 @@ def resolve_includes(code: str) -> str:

@staticmethod
def lex(code: str) -> list:
"""
split code to tokens
"""

toks = []
tok = ""
in_str = False

for li, ln in enumerate(code.splitlines()):
st = ln.strip()

# continue if empty or comment
if not st or st.startswith("#"):
continue

# make into native code if a jump or label
if st.startswith(">") or st.startswith("<"):
ln = f".{st}"

# pass through as two tokens if native code
st = ln.strip()
if st.startswith("."):
ln = f".\"{st[1:]}\""

# iterate over characters in a line
prev = ""
for i, c in enumerate(ln):
if c == " " and not in_str:
# character is empty and not in a string

if Lexer._match(tok) == TokenType.NONE and tok.strip():
# token doesn't match anything and is not empty

print(f"{Format.ERROR}Error on line {li + 1}, column {i + 1}: Invalid token \"{tok}\"{Format.RESET}\n\nHere:\n{ln}\n{arrows.generate(i - len(tok), len(tok))}")
sys.exit(1)

elif tok.strip():
# token matches something and is not empty

toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
tok = ""

continue

if c == "\"" and prev != "\\":
# character is `"` and previous character isn't `\`

in_str = not in_str

matched = Lexer._match(tok)
Expand All @@ -147,29 +213,40 @@ def lex(code: str) -> list:

matches = Lexer._match(tok)

# extend `==` token to `===`
if tok == "==" and i < len(ln) - 1:
if ln[i + 1] == "=":
toks.append(Token(TokenType.OPERATOR, "===", li, i - len(tok), ln))
tok = ""
continue

# continue if a decimal number
elif c == "." and matched == TokenType.NUMBER and matches == TokenType.NONE:
continue


# token type changed
if ((matched != TokenType.NONE) and (matches == TokenType.NONE)) or ((matched != matches) and matched != TokenType.NONE):
if tok in ["+=", "-=", "*=", "/=", ">=", "<=", "==", "**"]:
# special token

toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
tok = ""

else:
#normal token

tok = tok[:-1]
toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
tok = c

prev = c

# weird workaround probably for duplication of tokens
if len(toks) >= 2:
if toks[-1].value == "=" and toks[-2].value == "===":
toks.pop()

# add last token if not empty and matches something
if Lexer._match(tok) != TokenType.NONE:
toks.append(Token(Lexer._match(tok), tok, li + 1, i - len(tok), ln))
tok = ""
Expand All @@ -181,6 +258,10 @@ def lex(code: str) -> list:

@staticmethod
def _match(token: str) -> TokenType:
"""
match a token to a type
"""

for t, r in LEX_REGEXES.items():
if r.fullmatch(token):
return t
Expand All @@ -189,6 +270,10 @@ def _match(token: str) -> TokenType:

@staticmethod
def _matchtp(tokens: list, pos: int, pattern: list, *patterns: list) -> bool:
"""
match a token pattern
"""

for p in patterns:
if len(p) != len(pattern):
return False
Expand All @@ -206,12 +291,21 @@ def _matchtp(tokens: list, pos: int, pattern: list, *patterns: list) -> bool:

@staticmethod
def stringify_tokens(tokens: list) -> str:
"""
stringify a list of tokens
"""

return "\n".join([str(t) for t in tokens])

@staticmethod
def preprocess(tokens: list) -> list:
"""
preprocess a token list
"""

tmp = tokens.copy()

# find consts
consts = {}
found = True
while found:
Expand All @@ -231,6 +325,7 @@ def preprocess(tokens: list) -> list:

break

# replace consts
for i, t in enumerate(tmp):
for k, v in consts.items():
if t.value == k:
Expand Down
18 changes: 18 additions & 0 deletions mlogpp/linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,45 @@
from .error import link_error

class Linker:
"""
links generated code together
"""

@staticmethod
def link(codes: list) -> str:
"""
links generated code together
"""

offset = 0
tmp = []
for code in codes:
# relocate jumps
c, o = Linker._relocate(code, offset)

tmp.append(c)
offset += o

return "\n".join(tmp)

@staticmethod
def _relocate(code: str, offset: int) -> str:
"""
relocate jumps in compiled code
"""

tmp = ""
nl = 0
for ln in code.strip().splitlines():
# check if line is a jump
if ln.startswith("jump "):
spl = ln.split(" ")

# check if jump has enough arguments
if len(spl) != 5:
link_error(Position(nl, 0, ln, len(ln)), "Invalid jump instruction")

# check if jump address is valid
pos = spl[1]
try:
pos = int(pos)
Expand Down
Loading

0 comments on commit cc878d1

Please sign in to comment.