diff --git a/tests/functional/codegen/features/test_clampers.py b/tests/functional/codegen/features/test_clampers.py index b82a771962..2b015a1cce 100644 --- a/tests/functional/codegen/features/test_clampers.py +++ b/tests/functional/codegen/features/test_clampers.py @@ -5,7 +5,6 @@ from eth_utils import keccak from tests.utils import ZERO_ADDRESS, decimal_to_int -from vyper.exceptions import StackTooDeep from vyper.utils import int_bounds @@ -502,7 +501,6 @@ def foo(b: DynArray[int128, 10]) -> DynArray[int128, 10]: @pytest.mark.parametrize("value", [0, 1, -1, 2**127 - 1, -(2**127)]) -@pytest.mark.venom_xfail(raises=StackTooDeep, reason="stack scheduler regression") def test_multidimension_dynarray_clamper_passing(get_contract, value): code = """ @external diff --git a/tests/unit/compiler/venom/test_mem_allocator.py b/tests/unit/compiler/venom/test_mem_allocator.py new file mode 100644 index 0000000000..a5d88d3cd6 --- /dev/null +++ b/tests/unit/compiler/venom/test_mem_allocator.py @@ -0,0 +1,116 @@ +import pytest + +from vyper.venom.mem_allocator import MemoryAllocator + +MEM_BLOCK_ADDRESS = 0x1000 + + +@pytest.fixture +def allocator(): + return MemoryAllocator(1024, MEM_BLOCK_ADDRESS) + + +def test_initial_state(allocator): + assert allocator.get_free_memory() == 1024 + assert allocator.get_allocated_memory() == 0 + + +def test_single_allocation(allocator): + addr = allocator.allocate(256) + assert addr == MEM_BLOCK_ADDRESS + assert allocator.get_free_memory() == 768 + assert allocator.get_allocated_memory() == 256 + + +def test_multiple_allocations(allocator): + addr1 = allocator.allocate(256) + addr2 = allocator.allocate(128) + addr3 = allocator.allocate(64) + + assert addr1 == MEM_BLOCK_ADDRESS + assert addr2 == MEM_BLOCK_ADDRESS + 256 + assert addr3 == MEM_BLOCK_ADDRESS + 384 + assert allocator.get_free_memory() == 576 + assert allocator.get_allocated_memory() == 448 + + +def test_deallocation(allocator): + addr1 = allocator.allocate(256) + addr2 = allocator.allocate(128) + + assert allocator.deallocate(addr1) is True + assert allocator.get_free_memory() == 896 + assert allocator.get_allocated_memory() == 128 + + assert allocator.deallocate(addr2) is True + assert allocator.get_free_memory() == 1024 + assert allocator.get_allocated_memory() == 0 + + +def test_allocation_after_deallocation(allocator): + addr1 = allocator.allocate(256) + allocator.deallocate(addr1) + addr2 = allocator.allocate(128) + + assert addr2 == MEM_BLOCK_ADDRESS + assert allocator.get_free_memory() == 896 + assert allocator.get_allocated_memory() == 128 + + +def test_out_of_memory(allocator): + allocator.allocate(1000) + with pytest.raises(MemoryError): + allocator.allocate(100) + + +def test_invalid_deallocation(allocator): + assert allocator.deallocate(0x2000) is False + + +def test_fragmentation_and_merging(allocator): + addr1 = allocator.allocate(256) + addr2 = allocator.allocate(256) + addr3 = allocator.allocate(256) + + assert allocator.get_free_memory() == 256 + assert allocator.get_allocated_memory() == 768 + + allocator.deallocate(addr1) + assert allocator.get_free_memory() == 512 + assert allocator.get_allocated_memory() == 512 + + allocator.deallocate(addr3) + assert allocator.get_free_memory() == 768 + assert allocator.get_allocated_memory() == 256 + + addr4 = allocator.allocate(512) + assert addr4 == MEM_BLOCK_ADDRESS + 512 + assert allocator.get_free_memory() == 256 + assert allocator.get_allocated_memory() == 768 + + allocator.deallocate(addr2) + assert allocator.get_free_memory() == 512 + assert allocator.get_allocated_memory() == 512 + + allocator.deallocate(addr4) + assert allocator.get_free_memory() == 1024 # All blocks merged + assert allocator.get_allocated_memory() == 0 + + # Test if we can now allocate the entire memory + addr5 = allocator.allocate(1024) + assert addr5 == MEM_BLOCK_ADDRESS + assert allocator.get_free_memory() == 0 + assert allocator.get_allocated_memory() == 1024 + + +def test_exact_fit_allocation(allocator): + addr1 = allocator.allocate(1024) + assert addr1 == MEM_BLOCK_ADDRESS + assert allocator.get_free_memory() == 0 + assert allocator.get_allocated_memory() == 1024 + + allocator.deallocate(addr1) + addr2 = allocator.allocate(1024) + assert addr2 == MEM_BLOCK_ADDRESS + assert allocator.get_free_memory() == 0 + assert allocator.get_allocated_memory() == 1024 diff --git a/vyper/exceptions.py b/vyper/exceptions.py index c69163b561..7b98818fcd 100644 --- a/vyper/exceptions.py +++ b/vyper/exceptions.py @@ -400,10 +400,6 @@ class CodegenPanic(VyperInternalException): """Invalid code generated during codegen phase""" -class StackTooDeep(CodegenPanic): - """Stack too deep""" # (should not happen) - - class UnexpectedNodeType(VyperInternalException): """Unexpected AST node type.""" @@ -424,6 +420,15 @@ class InvalidABIType(VyperInternalException): """An internal routine constructed an invalid ABI type""" +class UnreachableStackException(VyperException): + + """An unreachable stack operation was encountered.""" + + def __init__(self, message, op): + self.op = op + super().__init__(message) + + @contextlib.contextmanager def tag_exceptions(node, fallback_exception_type=CompilerPanic, note=None): try: diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 593a9556a9..436c1306cc 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -6,12 +6,14 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel from vyper.venom.analysis.analysis import IRAnalysesCache +from vyper.venom.basicblock import IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.ir_node_to_venom import ir_node_to_venom from vyper.venom.passes import ( SCCP, AlgebraicOptimizationPass, + AllocaElimination, BranchOptimizationPass, DFTPass, FloatAllocas, @@ -19,6 +21,7 @@ Mem2Var, RemoveUnusedVariablesPass, SimplifyCFGPass, + Stack2Mem, StoreElimination, StoreExpansionPass, ) @@ -51,6 +54,7 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: FloatAllocas(ac, fn).run_pass() SimplifyCFGPass(ac, fn).run_pass() + AllocaElimination(ac, fn).run_pass() MakeSSA(ac, fn).run_pass() Mem2Var(ac, fn).run_pass() MakeSSA(ac, fn).run_pass() @@ -70,6 +74,7 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: StoreExpansionPass(ac, fn).run_pass() DFTPass(ac, fn).run_pass() + Stack2Mem(ac, fn).run_pass() def generate_ir(ir: IRnode, optimize: OptimizationLevel) -> IRContext: diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 0b0252d976..caca12f8ca 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -2,6 +2,7 @@ from vyper.venom.basicblock import IRInstruction, IRLabel, IROperand from vyper.venom.function import IRFunction +from vyper.venom.mem_allocator import MemoryAllocator class IRContext: @@ -10,6 +11,7 @@ class IRContext: immutables_len: Optional[int] data_segment: list[IRInstruction] last_label: int + mem_allocator: MemoryAllocator def __init__(self) -> None: self.functions = {} @@ -17,6 +19,9 @@ def __init__(self) -> None: self.immutables_len = None self.data_segment = [] self.last_label = 0 + self.mem_allocator = MemoryAllocator( + 4096, 0x100000 + ) # TODO: Should get this from the original IR def add_function(self, fn: IRFunction) -> None: fn.ctx = self diff --git a/vyper/venom/function.py b/vyper/venom/function.py index 0c48c9740e..85eec4dce3 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -1,7 +1,9 @@ from typing import Iterator, Optional from vyper.codegen.ir_node import IRnode -from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRVariable +from vyper.utils import OrderedSet +from vyper.venom.basicblock import CFG_ALTERING_INSTRUCTIONS, IRBasicBlock, IRLabel, IRVariable +from vyper.venom.mem_allocator import MemoryAllocator class IRFunction: @@ -15,6 +17,7 @@ class IRFunction: last_label: int last_variable: int _basic_block_dict: dict[str, IRBasicBlock] + _mem_allocator: MemoryAllocator # Used during code generation _ast_source_stack: list[IRnode] @@ -31,6 +34,8 @@ def __init__(self, name: IRLabel, ctx: "IRContext" = None) -> None: # type: ign self._ast_source_stack = [] self._error_msg_stack = [] + self._mem_allocator = MemoryAllocator(0xFFFFFFFFFFFFFFFF, 32) + self.append_basic_block(IRBasicBlock(name, self)) @property diff --git a/vyper/venom/mem_allocator.py b/vyper/venom/mem_allocator.py new file mode 100644 index 0000000000..625f6746b9 --- /dev/null +++ b/vyper/venom/mem_allocator.py @@ -0,0 +1,61 @@ +from typing import List + + +class MemoryBlock: + size: int + address: int + is_free: bool + + def __init__(self, size: int, address: int): + self.size = size + self.address = address + self.is_free = True + + +class MemoryAllocator: + total_size: int + start_address: int + blocks: List[MemoryBlock] + + def __init__(self, total_size: int, start_address: int): + self.total_size = total_size + self.start_address = start_address + self.blocks = [MemoryBlock(total_size, 0)] + + def allocate(self, size: int) -> int: + # print(f"Allocating {size} bytes with free memory {self.get_free_memory()}") + for block in self.blocks: + if block.is_free and block.size >= size: + if block.size > size: + new_block = MemoryBlock(block.size - size, block.address + size) + self.blocks.insert(self.blocks.index(block) + 1, new_block) + block.size = size + block.is_free = False + return self.start_address + block.address + raise MemoryError( + f"Memory allocation failed for size {size} with free memory {self.get_free_memory()}" + ) + + def deallocate(self, address: int) -> bool: + relative_address = address - self.start_address + for block in self.blocks: + if block.address == relative_address: + block.is_free = True + self._merge_adjacent_free_blocks() + return True + return False # invalid address + + def _merge_adjacent_free_blocks(self) -> None: + i = 0 + while i < len(self.blocks) - 1: + if self.blocks[i].is_free and self.blocks[i + 1].is_free: + self.blocks[i].size += self.blocks[i + 1].size + self.blocks.pop(i + 1) + else: + i += 1 + + def get_free_memory(self) -> int: + return sum(block.size for block in self.blocks if block.is_free) + + def get_allocated_memory(self) -> int: + return sum(block.size for block in self.blocks if not block.is_free) diff --git a/vyper/venom/passes/__init__.py b/vyper/venom/passes/__init__.py index fcd2aa1f22..2151b3851c 100644 --- a/vyper/venom/passes/__init__.py +++ b/vyper/venom/passes/__init__.py @@ -1,4 +1,5 @@ from .algebraic_optimization import AlgebraicOptimizationPass +from .alloca_elimination import AllocaElimination from .branch_optimization import BranchOptimizationPass from .dft import DFTPass from .float_allocas import FloatAllocas @@ -8,5 +9,6 @@ from .remove_unused_variables import RemoveUnusedVariablesPass from .sccp import SCCP from .simplify_cfg import SimplifyCFGPass +from .stack2mem import Stack2Mem from .store_elimination import StoreElimination from .store_expansion import StoreExpansionPass diff --git a/vyper/venom/passes/alloca_elimination.py b/vyper/venom/passes/alloca_elimination.py new file mode 100644 index 0000000000..45fe8a9c7e --- /dev/null +++ b/vyper/venom/passes/alloca_elimination.py @@ -0,0 +1,21 @@ +from vyper.venom.basicblock import IRInstruction, IRLiteral +from vyper.venom.passes.base_pass import IRPass + + +class AllocaElimination(IRPass): + """ + This pass eliminates alloca instructions by allocating memory for them + """ + + def run_pass(self): + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + if inst.opcode == "alloca": + self._process_alloca(inst) + + def _process_alloca(self, inst: IRInstruction): + offset, _size = inst.operands + address = inst.parent.parent._mem_allocator.allocate(_size.value) + inst.opcode = "store" + inst.operands = [IRLiteral(address)] + # print(f"Allocated address {address} for alloca {_size.value}") diff --git a/vyper/venom/passes/stack2mem.py b/vyper/venom/passes/stack2mem.py new file mode 100644 index 0000000000..31db891ee6 --- /dev/null +++ b/vyper/venom/passes/stack2mem.py @@ -0,0 +1,70 @@ +from vyper.exceptions import UnreachableStackException +from vyper.venom.analysis.cfg import CFGAnalysis +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.analysis.liveness import LivenessAnalysis +from vyper.venom.basicblock import IRInstruction, IRLiteral, IRVariable +from vyper.venom.mem_allocator import MemoryAllocator +from vyper.venom.passes.base_pass import IRPass +from vyper.venom.venom_to_assembly import VenomCompiler + + +class Stack2Mem(IRPass): + mem_allocator: MemoryAllocator + + def run_pass(self): + fn = self.function + self.mem_allocator = self.function.ctx.mem_allocator + self.analyses_cache.request_analysis(CFGAnalysis) + dfg = self.analyses_cache.request_analysis(DFGAnalysis) + self.analyses_cache.request_analysis(LivenessAnalysis) + + while True: + compiler = VenomCompiler([fn.ctx]) + try: + compiler.generate_evm() + break + except Exception as e: + if isinstance(e, UnreachableStackException): + self._demote_variable(dfg, e.op) + self.analyses_cache.force_analysis(LivenessAnalysis) + else: + break + + self.analyses_cache.invalidate_analysis(DFGAnalysis) + + def _demote_variable(self, dfg: DFGAnalysis, var: IRVariable): + """ + Demote a stack variable to memory operations. + """ + uses = dfg.get_uses(var) + def_inst = dfg.get_producing_instruction(var) + + # Allocate memory for this variable + mem_addr = self.mem_allocator.allocate(32) + + if def_inst is not None: + self._insert_mstore_after(def_inst, mem_addr) + + for inst in uses: + self._insert_mload_before(inst, mem_addr, var) + + def _insert_mstore_after(self, inst: IRInstruction, mem_addr: int): + bb = inst.parent + idx = bb.instructions.index(inst) + assert inst.output is not None + # mem_var = IRVariable(f"mem_{mem_addr}") + # bb.insert_instruction( + # IRInstruction("alloca", [IRLiteral(mem_addr), 32], mem_var), idx + 1 + # ) + new_var = self.function.get_next_variable() + bb.insert_instruction(IRInstruction("mstore", [new_var, IRLiteral(mem_addr)]), idx + 1) + inst.output = new_var + + def _insert_mload_before(self, inst: IRInstruction, mem_addr: int, var: IRVariable): + bb = inst.parent + idx = bb.instructions.index(inst) + new_var = self.function.get_next_variable() + load_inst = IRInstruction("mload", [IRLiteral(mem_addr)]) + load_inst.output = new_var + bb.insert_instruction(load_inst, idx) + inst.replace_operands({var: new_var}) diff --git a/vyper/venom/stack_model.py b/vyper/venom/stack_model.py index e284b41fb2..c0447cf0e2 100644 --- a/vyper/venom/stack_model.py +++ b/vyper/venom/stack_model.py @@ -63,23 +63,30 @@ def get_phi_depth(self, phis: list[IRVariable]) -> int: def peek(self, depth: int) -> IROperand: """ - Returns the top of the stack map. + Returns the depth-th element from the top of the stack. """ assert depth is not StackModel.NOT_IN_STACK, "Cannot peek non-in-stack depth" + assert depth <= 0, "Cannot peek positive depth" return self._stack[depth - 1] def poke(self, depth: int, op: IROperand) -> None: """ - Pokes an operand at the given depth in the stack map. + Pokes an operand at the given depth in the stack. """ assert depth is not StackModel.NOT_IN_STACK, "Cannot poke non-in-stack depth" - assert depth <= 0, "Bad depth" + assert depth <= 0, "Cannot poke positive depth" assert isinstance(op, IROperand), f"{type(op)}: {op}" self._stack[depth - 1] = op + def top(self) -> IROperand: + """ + Returns the top of the stack. + """ + return self.peek(0) + def dup(self, depth: int) -> None: """ - Duplicates the operand at the given depth in the stack map. + Duplicates the operand at the given depth in the stack. """ assert depth is not StackModel.NOT_IN_STACK, "Cannot dup non-existent operand" assert depth <= 0, "Cannot dup positive depth" @@ -87,7 +94,7 @@ def dup(self, depth: int) -> None: def swap(self, depth: int) -> None: """ - Swaps the operand at the given depth in the stack map with the top of the stack. + Swaps the operand at the given depth in the stack with the top of the stack. """ assert depth is not StackModel.NOT_IN_STACK, "Cannot swap non-existent operand" assert depth < 0, "Cannot swap positive depth" diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 9b52b842ba..c525f9be28 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -1,6 +1,6 @@ from typing import Any -from vyper.exceptions import CompilerPanic, StackTooDeep +from vyper.exceptions import CompilerPanic, UnreachableStackException from vyper.ir.compile_ir import ( PUSH, DataHeader, @@ -364,7 +364,13 @@ def _generate_evm_for_instruction( if opcode in ["jmp", "djmp", "jnz", "invoke"]: operands = list(inst.get_non_label_operands()) - elif opcode in ("alloca", "palloca"): + elif opcode == "alloca": + raise Exception("Alloca at assembly generation is not valid") + offset, _size = inst.operands + offset = inst.parent.parent._mem_allocator.allocate(_size.value) + # print(f"Allocated {offset} for alloca {_size}") + operands = [offset] + elif opcode == "palloca": offset, _size = inst.operands operands = [offset] @@ -582,22 +588,21 @@ def _generate_evm_for_instruction( return apply_line_numbers(inst, assembly) - def pop(self, assembly, stack, num=1): + def pop(self, assembly, stack: StackModel, num=1): stack.pop(num) assembly.extend(["POP"] * num) - def swap(self, assembly, stack, depth) -> int: + def swap(self, assembly, stack: StackModel, depth) -> int: # Swaps of the top is no op if depth == 0: return 0 - stack.swap(depth) - assembly.append(_evm_swap_for(depth)) + assembly.append(_evm_swap_for(depth, stack.top())) return 1 - def dup(self, assembly, stack, depth): + def dup(self, assembly, stack: StackModel, depth): stack.dup(depth) - assembly.append(_evm_dup_for(depth)) + assembly.append(_evm_dup_for(depth, stack.top())) def swap_op(self, assembly, stack, op): depth = stack.get_depth(op) @@ -610,15 +615,15 @@ def dup_op(self, assembly, stack, op): self.dup(assembly, stack, depth) -def _evm_swap_for(depth: int) -> str: +def _evm_swap_for(depth: int, op: IROperand) -> str: swap_idx = -depth if not (1 <= swap_idx <= 16): - raise StackTooDeep(f"Unsupported swap depth {swap_idx}") + raise UnreachableStackException(f"Unsupported swap depth {swap_idx} ({op})", op) return f"SWAP{swap_idx}" -def _evm_dup_for(depth: int) -> str: +def _evm_dup_for(depth: int, op: IROperand) -> str: dup_idx = 1 - depth if not (1 <= dup_idx <= 16): - raise StackTooDeep(f"Unsupported dup depth {dup_idx}") + raise UnreachableStackException(f"Unsupported dup depth {dup_idx} ({op})", op) return f"DUP{dup_idx}"