initial brillig more aggressiveness loop unrolling work,still a draft…

… and needs tests
noir-lang · vezenovm · Jan 29, 2025 · Jan 29, 2025 · Jan 30, 2025 · Jan 30, 2025
commit c779b6cfc0ead5e5d2aa5bcbb3e65f02002af1f2
diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs
@@ -174,7 +174,11 @@ fn optimize_all(builder: SsaBuilder, options: &SsaEvaluatorOptions) -> Result<Ss
         .run_pass(Ssa::loop_invariant_code_motion, "Loop Invariant Code Motion")
         .try_run_pass(
             |ssa| ssa.unroll_loops_iteratively(options.max_bytecode_increase_percent),
-            "Unrolling",
+            "Unrolling (1st)",
+        )?
+        .try_run_pass(
+            |ssa| ssa.unroll_loops_iteratively(options.max_bytecode_increase_percent),
+            "Unrolling (2nd)",
         )?
         .run_pass(Ssa::simplify_cfg, "Simplifying (2nd)")
         .run_pass(Ssa::mem2reg, "Mem2Reg (3rd)")

diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -76,7 +76,9 @@
             // more finessing to convince the borrow checker that it's okay to share a read-only reference
             // to the globals and a mutable reference to the function at the same time, both part of the `Ssa`.
             if has_unrolled && is_brillig {
-                dbg!(max_bytecode_increase_percent.clone());
+                // TODO: As we unroll more loops, this is potentially going to lead to panics
+                // when we have a non-aggressiveness inliner as the compiler is not going to have yet resolved
+                // certain intrinsics which we expect to be entirely known at compile-time (e.g. DerivePedersenGenerators).
                 if let Some(max_incr_pct) = max_bytecode_increase_percent {
                     if global_cache.is_none() {
                         let globals = (*function.dfg.globals).clone();
@@ -144,6 +146,7 @@
     }
 }
 
+#[derive(Debug, Clone)]
 pub(super) struct Loop {
     /// The header block of a loop is the block which dominates all the
     /// other blocks in the loop.
@@ -222,8 +225,6 @@
         // their loop range. We will start popping loops from the back.
         loops.sort_by_key(|loop_| loop_.blocks.len());
 
-        // dbg!(loops.clone());
-
         Self {
             failed_to_unroll: im::HashSet::default(),
             yet_to_unroll: loops,
@@ -654,22 +655,21 @@
     /// Count the number of increments to the induction variable.
     /// It should be one, but it can be duplicated.
     /// The increment should be in the block where the back-edge was found.
-    fn count_induction_increments(&self, function: &Function) -> usize {
-        let back = &function.dfg[self.back_edge_start];
-        let header = &function.dfg[self.header];
-        let induction_var = header.parameters()[0];
-
-        back.instructions()
-            .iter()
-            .filter(|instruction| {
-                let instruction = &function.dfg[**instruction];
-                matches!(instruction,
-                    Instruction::Binary(Binary { lhs, operator: BinaryOp::Add { .. }, rhs: _ })
-                        if *lhs == induction_var
-                )
-            })
-            .count()
-    }
+    // fn count_induction_increments(&self, function: &Function) -> usize {
+    //     let back = &function.dfg[self.back_edge_start];
+    //     let header = &function.dfg[self.header];
+    //     let induction_var = header.parameters()[0];
+    //     back.instructions()
+    //         .iter()
+    //         .filter(|instruction| {
+    //             let instruction = &function.dfg[**instruction];
+    //             matches!(instruction,
+    //                 Instruction::Binary(Binary { lhs, operator: BinaryOp::Add { .. }, rhs: _ })
+    //                     if *lhs == induction_var
+    //             )
+    //         })
+    //         .count()
+    // }
 
     /// Decide if this loop is small enough that it can be inlined in a way that the number
     /// of unrolled instructions times the number of iterations would result in smaller bytecode
@@ -678,6 +678,8 @@
         self.boilerplate_stats(function, cfg).map(|s| s.is_small()).unwrap_or_default()
     }
 
+    // TODO: unify this method with the one in the loop invariant pass
+    // probably can unify some other code too
     fn values_defined_in_loop(&self, function: &Function) -> HashSet<ValueId> {
         let mut defined_in_loop = HashSet::default();
         for block in self.blocks.iter() {
@@ -706,24 +708,17 @@
     ) -> usize {
         let mut useless_instructions = 0;
         let induction_var = self.get_induction_variable(function);
-        let mut new_induction_var = induction_var;
         for block in &self.blocks {
             for instruction in function.dfg[*block].instructions() {
                 let results = function.dfg.instruction_results(*instruction);
                 let instruction = &function.dfg[*instruction];
-                // TODO: unify how all these instructions are checked
+                // TODO: unify how all these instructions are analyzed
                 match instruction {
                     Instruction::Load { address } if refs.contains(address) => {
-                        if !defined_in_loop.contains(&address) {
+                        if !defined_in_loop.contains(address) {
                             defined_in_loop.remove(&results[0]);
                         }
                     }
-                    Instruction::Store { value, .. } => {
-                        if *value == new_induction_var || *value == induction_var {
-                            dbg!("got here");
-                            return 0;
-                        }
-                    }
                     Instruction::ArrayGet { array, index } => {
                         let index_is_useless = induction_var == *index
                             || function.dfg.is_constant(*index)
@@ -733,7 +728,7 @@
                             useless_instructions += 1;
                         }
                     }
-                    Instruction::ArraySet { array, index, value, mutable } => {
+                    Instruction::ArraySet { array, index, value, .. } => {
                         let index_is_useless = induction_var == *index
                             || function.dfg.is_constant(*index)
                             || !defined_in_loop.contains(index);
@@ -746,11 +741,6 @@
                         }
                     }
                     Instruction::Binary(_) => {
-                        // TODO: need to check each of these values better
-                        // It is "useless" when:
-                        //  - We have a constant
-                        //  - It is an induction variable
-                        //  - It has not been defined in the loop
                         let mut is_useless = true;
                         instruction.for_each_value(|value| {
                             is_useless &= !defined_in_loop.contains(&value)
@@ -762,12 +752,6 @@
                             useless_instructions += 1;
                         }
                     }
-                    Instruction::Cast(value, _) => {
-                        if *value == induction_var {
-                            dbg!("got here");
-                            new_induction_var = results[0];
-                        }
-                    }
                     _ => {}
                 }
             }
@@ -787,6 +771,10 @@
         let upper = upper.try_to_u64()?;
         let refs = self.find_pre_header_reference_values(function, cfg)?;
 
+        // If we have a break block, we can potentially directly use the induction variable in that break.
+        // If we then unroll the loop, the induction variable will not exist anymore.
+        // TODO: we should appropriately unroll and account for the break
+        // TODO 2: move this logic out into its own method
         let induction_var = self.get_induction_variable(function);
         let mut uses_induction_var_outside = false;
         for block in self.blocks.iter() {
@@ -806,14 +794,13 @@
         }
 
         let defined_in_loop = self.values_defined_in_loop(function);
+
         let useless_instructions = if uses_induction_var_outside {
             0
         } else {
             self.redefine_useful_results(function, defined_in_loop, &refs)
         };
-        // let useless_instructions = self.redefine_useful_results(function, defined_in_loop, &refs);
-        dbg!(self.blocks.clone());
-        dbg!(useless_instructions);
+
         let (loads, stores) = self.count_loads_and_stores(function, &refs);
         let all_instructions = self.count_all_instructions(function);
 
@@ -871,15 +858,15 @@
     /// Estimated number of _useful_ instructions, which is the ones in the loop
     /// minus all in-loop boilerplate.
     fn useful_instructions(&self) -> usize {
-        // Two jumps
+        // Boilerplate only includes two jumps for induction variable comparison
+        // and the back edge jump to the header.
         // The comparison with the upper bound and the increments to the induction variable
         // are included in the `useless_instructions`
         let boilerplate = 2;
         // Be conservative and only assume that mem2reg gets rid of load followed by store.
         // NB we have not checked that these are actual pairs.
         let load_and_store = self.loads.min(self.stores) * 2;
         self.all_instructions - load_and_store - boilerplate - self.useless_instructions
-        // self.all_instructions - self.increments - load_and_store - boilerplate
     }
 
     /// Estimated number of instructions if we unroll the loop.
@@ -892,10 +879,6 @@
     /// the blocks in tact with all the boilerplate involved in jumping, and the extra
     /// reference access instructions.
     fn is_small(&self) -> bool {
-        // dbg!(self.useful_instructions());
-        // dbg!(self.iterations);
-        // dbg!(self.unrolled_instructions());
-        // dbg!(self.baseline_instructions());
         self.unrolled_instructions() < self.baseline_instructions()
     }
 }
@@ -1197,7 +1180,7 @@
    use super::{is_new_size_ok, BoilerplateStats, Loops};

    /// Tries to unroll all loops in each SSA function once, calling the `Function` directly,
    /// bypassing the iterative loop done by the SSA which does further optimisations.
    ///
    /// If any loop cannot be unrolled, it is left as-is or in a partially unrolled state.
    fn try_unroll_loops(mut ssa: Ssa) -> (Ssa, Vec<RuntimeError>) {

diff --git a/test_programs/execution_success/global_var_regression_simple/src/main.nr b/test_programs/execution_success/global_var_regression_simple/src/main.nr
@@ -7,19 +7,19 @@ fn main(x: Field, y: pub Field) {
             acc += EXPONENTIATE[i][j];
         }
     }
-    assert(!acc.lt(x));
+    // assert(!acc.lt(x));
     assert(x != y);
 
-    dummy_again(x, y);
+    // dummy_again(x, y);
 }
 
-fn dummy_again(x: Field, y: Field) {
-    let mut acc: Field = 0;
-    for i in 0..2 {
-        for j in 0..2 {
-            acc += EXPONENTIATE[i][j];
-        }
-    }
-    assert(!acc.lt(x));
-    assert(x != y);
-}
+// fn dummy_again(x: Field, y: Field) {
+//     let mut acc: Field = 0;
+//     for i in 0..2 {
+//         for j in 0..2 {
+//             acc += EXPONENTIATE[i][j];
+//         }
+//     }
+//     assert(!acc.lt(x));
+//     assert(x != y);
+// }
diff --git a/test_programs/execution_success/unroll_loop_regression/Nargo.toml b/test_programs/execution_success/unroll_loop_regression/Nargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "unroll_loop_regression"
+type = "bin"
+authors = [""]
+
+[dependencies]
diff --git a/test_programs/execution_success/unroll_loop_regression/Prover.toml b/test_programs/execution_success/unroll_loop_regression/Prover.toml
@@ -0,0 +1 @@
+fracs = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
diff --git a/test_programs/execution_success/unroll_loop_regression/src/main.nr b/test_programs/execution_success/unroll_loop_regression/src/main.nr
@@ -0,0 +1,27 @@
+global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, 15, 16];
+
+unconstrained fn main(
+    fracs: [Field; 16],
+) -> pub [Field; 2] {
+    let mut partial_sums: [Field; 2] = std::mem::zeroed();
+
+    let mut partial_sum: Field = 0;
+    for i in 0..8 {
+        let summand = ROOTS[i] * fracs[i];
+        partial_sum = partial_sum + summand;
+    }
+    partial_sums[0] = partial_sum;
+
+    let NUM_PARTIAL_SUMS = 2;
+    for i in 1..NUM_PARTIAL_SUMS {
+        let mut partial_sum: Field = partial_sums[i - 1];
+        for j in 0..8 {
+            let k = i * 8 + j;
+            let summand = ROOTS[k]* fracs[k];
+            partial_sum = partial_sum + summand;
+        }
+        partial_sums[i] = partial_sum;
+    }
+
+    partial_sums
+}
diff --git a/tooling/nargo_cli/build.rs b/tooling/nargo_cli/build.rs
@@ -198,8 +198,8 @@ fn test_{test_name}(force_brillig: ForceBrillig, inliner_aggressiveness: Inliner
         nargo.arg("--force-brillig");
 
         // Set the maximum increase so that part of the optimization is exercised (it might fail).
-        nargo.arg("--max-bytecode-increase-percent");
-        nargo.arg("50");
+        // nargo.arg("--max-bytecode-increase-percent");
+        // nargo.arg("50");
     }}
 
     {test_content}

diff --git a/compiler/noirc_evaluator/src/brillig/brillig_ir/entry_point.rs b/compiler/noirc_evaluator/src/brillig/brillig_ir/entry_point.rs
        }

        for (i, bit_size) in arguments.iter().flat_map(flat_bit_sizes).enumerate() {
            // Calldatacopy tags everything with field type, so when downcast when necessary
            if bit_size < F::max_num_bits() {
                self.cast_instruction(
                    SingleAddrVariable::new(
diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
            // These can fail.
            Constrain(..) | ConstrainNotEqual(..) | RangeCheck { .. } => true,

            // This should never be side-effectful
            MakeArray { .. } | Noop => false,

            // Some binary math can overflow or underflow
diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs
    Or,
    /// Bitwise xor (^)
    Xor,
    /// Bitshift left (<<)
    Shl,
    /// Bitshift right (>>)
    Shr,
 }

        }

        let operator = if lhs_type == NumericType::NativeField {
            // Unchecked operations between fields or bools don't make sense, so we convert those to non-unchecked
            // to reduce noise and confusion in the generated SSA.
            match operator {
                BinaryOp::Add { unchecked: true } => BinaryOp::Add { unchecked: false },
                _ => operator,
            }
        } else if lhs_type == NumericType::bool() {
            // Unchecked mul between bools doesn't make sense, so we convert that to non-unchecked
            if let BinaryOp::Mul { unchecked: true } = operator {
                BinaryOp::Mul { unchecked: false }
            } else {
                }
                if lhs_type == NumericType::bool() {
                    // Boolean AND is equivalent to multiplication, which is a cheaper operation.
                    // (mul unchecked because these are bools so it doesn't matter really)
                    let instruction =
                        Instruction::binary(BinaryOp::Mul { unchecked: true }, lhs, rhs);
                    return SimplifyResult::SimplifiedToInstruction(instruction);
diff --git a/compiler/noirc_evaluator/src/ssa/opt/pure.rs b/compiler/noirc_evaluator/src/ssa/opt/pure.rs
        // Set of functions we call which the purity result depends on.
        // `is_pure` is intended to be called on each function, building
        // up a call graph of sorts to check afterwards to propagate impurity
        // from called functions to their callers. Resultingly, an initial "Pure"
        // result here could be overridden by one of these dependencies being impure.
        let mut dependencies = BTreeSet::new();

diff --git a/compiler/noirc_frontend/src/parser/parser/expression.rs b/compiler/noirc_frontend/src/parser/parser/expression.rs
    ///     = bool
    ///     | int
    ///     | str
    ///     | rawstr
    ///     | fmtstr
    ///     | QuoteExpression
    ///     | ArrayExpression
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		fracs = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
	}

	for (i, bit_size) in arguments.iter().flat_map(flat_bit_sizes).enumerate() {
	// Calldatacopy tags everything with field type, so when downcast when necessary
Check warning on line 182 in compiler/noirc_evaluator/src/brillig/brillig_ir/entry_point.rs GitHub Actions / Code `Unknown word (Calldatacopy)`
	if bit_size < F::max_num_bits() {
	self.cast_instruction(
	SingleAddrVariable::new(
	// These can fail.
	Constrain(..) \| ConstrainNotEqual(..) \| RangeCheck { .. } => true,

	// This should never be side-effectful
Check warning on line 426 in compiler/noirc_evaluator/src/ssa/ir/instruction.rs GitHub Actions / Code `Unknown word (effectful)`
	MakeArray { .. } \| Noop => false,

	// Some binary math can overflow or underflow
	Or,
	/// Bitwise xor (^)
	Xor,
	/// Bitshift left (<<)
Check warning on line 42 in compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs GitHub Actions / Code `Unknown word (Bitshift)`
	Shl,
	/// Bitshift right (>>)
Check warning on line 44 in compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs GitHub Actions / Code `Unknown word (Bitshift)`
	Shr,
	}

	}

	let operator = if lhs_type == NumericType::NativeField {
	// Unchecked operations between fields or bools don't make sense, so we convert those to non-unchecked
Check warning on line 110 in compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs GitHub Actions / Code `Unknown word (bools)`
	// to reduce noise and confusion in the generated SSA.
	match operator {
	BinaryOp::Add { unchecked: true } => BinaryOp::Add { unchecked: false },
	_ => operator,
	}
	} else if lhs_type == NumericType::bool() {
	// Unchecked mul between bools doesn't make sense, so we convert that to non-unchecked
Check warning on line 119 in compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs GitHub Actions / Code `Unknown word (bools)`
	if let BinaryOp::Mul { unchecked: true } = operator {
	BinaryOp::Mul { unchecked: false }
	} else {
	}
	if lhs_type == NumericType::bool() {
	// Boolean AND is equivalent to multiplication, which is a cheaper operation.
	// (mul unchecked because these are bools so it doesn't matter really)
Check warning on line 287 in compiler/noirc_evaluator/src/ssa/ir/instruction/binary.rs GitHub Actions / Code `Unknown word (bools)`
	let instruction =
	Instruction::binary(BinaryOp::Mul { unchecked: true }, lhs, rhs);
	return SimplifyResult::SimplifiedToInstruction(instruction);
	// Set of functions we call which the purity result depends on.
	// `is_pure` is intended to be called on each function, building
	// up a call graph of sorts to check afterwards to propagate impurity
	// from called functions to their callers. Resultingly, an initial "Pure"
Check warning on line 116 in compiler/noirc_evaluator/src/ssa/opt/pure.rs GitHub Actions / Code `Unknown word (Resultingly)`
	// result here could be overridden by one of these dependencies being impure.
	let mut dependencies = BTreeSet::new();
	/// = bool
	/// \| int
	/// \| str
	/// \| rawstr
Check warning on line 604 in compiler/noirc_frontend/src/parser/parser/expression.rs GitHub Actions / Code `Unknown word (rawstr)`
	/// \| fmtstr
	/// \| QuoteExpression
	/// \| ArrayExpression