From f77cbc2c7cb8a240b4a908b06038be382253371c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20Gro=C3=9F?= Date: Tue, 7 Jan 2025 14:33:50 +0100 Subject: [PATCH] Introduce DataFlowSimplifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a generic reducer that attempts to simplify the data flow of a program. For example, for the following program: v3 <- Foo v4 <- Bar v3 v5 <- Baz v4 And assuming that only `Baz` is the relevant operation, it would attempt to simplify it to just: v3 <- Foo v5 <- Baz v3 This should generally be a useful simplification, but is in particular helpful to remove named variables from the program if the names are not important. In the future, we could also likely replace the special-purpose ReassignReducer with this generic reducer. Change-Id: I402451e3ad4b301381ba4929d28a008f7ac93670 Reviewed-on: https://chrome-internal-review.googlesource.com/c/v8/fuzzilli/+/7934188 Reviewed-by: Carl Smith Commit-Queue: Samuel Groß --- .../Minimization/DataFlowSimplifier.swift | 88 ++++++++++++ ...ucer.swift => InstructionSimplifier.swift} | 2 +- ...LoopReducer.swift => LoopSimplifier.swift} | 2 +- .../Minimization/MinimizationHelper.swift | 2 +- Sources/Fuzzilli/Minimization/Minimizer.swift | 2 +- .../Minimization/ReassignReducer.swift | 4 + Tests/FuzzilliTests/MinimizerTest.swift | 134 +++++++++++++++--- 7 files changed, 207 insertions(+), 27 deletions(-) create mode 100644 Sources/Fuzzilli/Minimization/DataFlowSimplifier.swift rename Sources/Fuzzilli/Minimization/{SimplifyingReducer.swift => InstructionSimplifier.swift} (99%) rename Sources/Fuzzilli/Minimization/{LoopReducer.swift => LoopSimplifier.swift} (99%) diff --git a/Sources/Fuzzilli/Minimization/DataFlowSimplifier.swift b/Sources/Fuzzilli/Minimization/DataFlowSimplifier.swift new file mode 100644 index 000000000..eaf125385 --- /dev/null +++ b/Sources/Fuzzilli/Minimization/DataFlowSimplifier.swift @@ -0,0 +1,88 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Simplifies the data flow of a program. +/// +/// This essentially attempts to remove intermediate instructions in a data flow chain if they are not important. For example: +/// +/// v3 <- Foo +/// v4 <- Bar v3 +/// v5 <- Baz v4 +/// +/// If `Baz` is the interesting operation here, the `Bar` may be unnecessary and so we simplify this program to +/// +/// v3 <- Foo +/// v5 <- Baz v3 +/// +/// By removing `Bar` and replacing all uses of its output with one of its inputs. We assume that one of the inputs +/// is probably the best fit for a replacement. For cases such as `CreateNamedVariable` or `Reassign`, using +/// an input is definitely the right choice. In other cases, such as arithmetic operations, the input should at least have +/// roughly the right type. +struct DataFlowSimplifier: Reducer { + func reduce(with helper: MinimizationHelper) { + // Compute all candidates: intermediate operations in a data flow chain. + var candidates = [Int]() + var uses = VariableMap() + for instr in helper.code { + for input in instr.inputs { + uses[input]? += 1 + } + + // For now, we only consider simple instructions as candidates. + guard instr.isSimple else { continue } + // The instruction must have at least one output and one input, + // otherwise it wouldn't be an intermediate node. + guard instr.numOutputs > 0 else { continue } + guard instr.numInputs > 0 else { continue } + + candidates.append(instr.index) + for output in instr.outputs { + uses[output] = 0 + } + } + + // Remove those candidates whose outputs aren't used. + candidates = candidates.filter({ helper.code[$0].allOutputs.map({ uses[$0]! }).reduce(0, +) > 0 }) + + // Finally try to remove each remaining candidate. + for candidate in candidates { + var newCode = Code() + var replacements = VariableMap() + for instr in helper.code { + if instr.index == candidate { + assert(instr.numInputs > 0) + assert(instr.numOutputs > 0) + // Pick a random input as replacement. Here we could attempt to be smarter and + // for example find an input that seems more fitting, or we could try to apply + // some heursitic, such as using the input with the most uses itself. + let replacement = chooseUniform(from: instr.inputs) + for output in instr.allOutputs { + assert(uses.contains(output)) + replacements[output] = replacement + } + assert(instr.allOutputs.map({ uses[$0]! }).reduce(0, +) > 0) + // Replace the instruction with a "compatible" Nop (same in- and outputs) + newCode.append(helper.nop(for: instr)) + } else { + // Keep this instruction but potentially change the inputs. + let newInouts = instr.inouts.map({ replacements[$0] ?? $0 }) + let newInstr = Instruction(instr.op, inouts: newInouts, flags: instr.flags) + newCode.append(newInstr) + } + } + helper.testAndCommit(newCode) + } + } +} + diff --git a/Sources/Fuzzilli/Minimization/SimplifyingReducer.swift b/Sources/Fuzzilli/Minimization/InstructionSimplifier.swift similarity index 99% rename from Sources/Fuzzilli/Minimization/SimplifyingReducer.swift rename to Sources/Fuzzilli/Minimization/InstructionSimplifier.swift index e918c0c59..6c6c60d5b 100644 --- a/Sources/Fuzzilli/Minimization/SimplifyingReducer.swift +++ b/Sources/Fuzzilli/Minimization/InstructionSimplifier.swift @@ -13,7 +13,7 @@ // limitations under the License. // Attempts to simplify "complex" instructions into simpler instructions. -struct SimplifyingReducer: Reducer { +struct InstructionSimplifier: Reducer { func reduce(with helper: MinimizationHelper) { simplifyFunctionDefinitions(with: helper) simplifyNamedInstructions(with: helper) diff --git a/Sources/Fuzzilli/Minimization/LoopReducer.swift b/Sources/Fuzzilli/Minimization/LoopSimplifier.swift similarity index 99% rename from Sources/Fuzzilli/Minimization/LoopReducer.swift rename to Sources/Fuzzilli/Minimization/LoopSimplifier.swift index 30cf2adde..11169adb2 100644 --- a/Sources/Fuzzilli/Minimization/LoopReducer.swift +++ b/Sources/Fuzzilli/Minimization/LoopSimplifier.swift @@ -18,7 +18,7 @@ /// - Loops with many iterations are replaced with loops with fewer iterations /// /// This reducer should be scheduled after the BlockReducer which attempts to delete loop entirely (instead of simplifying them). -struct LoopReducer: Reducer { +struct LoopSimplifier: Reducer { // The loop iterations counts that we'll try out when attempting to reduce the number of iterations of a loop. private let commonLoopIterationCounts = [5, 10, 25, 50, 100, 250, 500, 1000] diff --git a/Sources/Fuzzilli/Minimization/MinimizationHelper.swift b/Sources/Fuzzilli/Minimization/MinimizationHelper.swift index ad4a78403..bdf956aa0 100644 --- a/Sources/Fuzzilli/Minimization/MinimizationHelper.swift +++ b/Sources/Fuzzilli/Minimization/MinimizationHelper.swift @@ -138,7 +138,7 @@ class MinimizationHelper { } } - // Run the modified program and see if the patch changed its behaviour + // Run the modified program and see if the reduction altered its behaviour var stillHasAspects = false performOnFuzzerQueue { for _ in 0..() diff --git a/Tests/FuzzilliTests/MinimizerTest.swift b/Tests/FuzzilliTests/MinimizerTest.swift index ac80df718..005f14fc5 100644 --- a/Tests/FuzzilliTests/MinimizerTest.swift +++ b/Tests/FuzzilliTests/MinimizerTest.swift @@ -42,7 +42,8 @@ class MinimizerTests: XCTestCase { // Build input program to be minimized. var n1 = b.loadInt(42) let n2 = b.loadInt(43) - var n3 = b.binary(n1, n1, with: .Add) + // This will be removed and n3 replaced by n1 (an input of this instruction) + let n3 = b.binary(n1, n1, with: .Add) let n4 = b.binary(n2, n2, with: .Add) evaluator.nextInstructionIsImportant(in: b) @@ -60,11 +61,10 @@ class MinimizerTests: XCTestCase { // Build expected output program. n1 = b.loadInt(42) - n3 = b.binary(n1, n1, with: .Add) b.loadString("foo") bar = b.loadString("bar") o1 = b.createObject(with: [:]) - b.setComputedProperty(bar, of: o1, to: n3) + b.setComputedProperty(bar, of: o1, to: n1) let expectedProgram = b.finalize() @@ -90,7 +90,9 @@ class MinimizerTests: XCTestCase { obj.addMethod("m", with: .parameters(n: 1)) { args in let this = args[0] let prefix = b.loadString("Hello World from ") + evaluator.nextInstructionIsImportant(in: b) let name = b.getProperty("name", of: this) + evaluator.nextInstructionIsImportant(in: b) let msg = b.binary(prefix, name, with: .Add) evaluator.nextInstructionIsImportant(in: b) b.doReturn(msg) @@ -243,6 +245,7 @@ class MinimizerTests: XCTestCase { evaluator.nextInstructionIsImportant(in: b) cls.addInstanceMethod("m", with: .parameters(n: 0)) { args in let this = args[0] + evaluator.nextInstructionIsImportant(in: b) let v = b.getPrivateProperty("name", of: this) evaluator.nextInstructionIsImportant(in: b) b.doReturn(v) @@ -574,9 +577,7 @@ class MinimizerTests: XCTestCase { evaluator.nextInstructionIsImportant(in: b) b.setProperty("result", of: o, to: r) - // As we are not emulating the dataflow through the function call in our evaluator, the minimizer will try to remove the binary ops and integer loads - // as they do not directly flow into the property store. To avoid this, we simply mark all binary ops and integer loads as important in this program. - evaluator.operationIsImportant(LoadInteger.self) + // Make sure to keep the binary operations. evaluator.operationIsImportant(BinaryOperation.self) // We also need to keep the return instruction as long as the function still exists. However, once the function has been inlined, the return should also disappear. evaluator.keepReturnsInFunctions = true @@ -596,6 +597,7 @@ class MinimizerTests: XCTestCase { // Perform minimization and check that the two programs are equal. let actualProgram = minimize(originalProgram, with: fuzzer) + XCTAssertEqual(FuzzILLifter().lift(expectedProgram), FuzzILLifter().lift(actualProgram)) XCTAssertEqual(expectedProgram, actualProgram) } @@ -658,6 +660,7 @@ class MinimizerTests: XCTestCase { var o = b.createObject(with: [:]) let f1 = b.buildPlainFunction(with: .parameters(n: 1)) { args in b.loadString("unused1") + evaluator.nextInstructionIsImportant(in: b) let r = b.unary(.PostInc, args[0]) b.doReturn(r) } @@ -665,12 +668,14 @@ class MinimizerTests: XCTestCase { let f3 = b.buildPlainFunction(with: .parameters(n: 1)) { args in b.loadString("unused2") b.loadArguments() + evaluator.nextInstructionIsImportant(in: b) let r = b.unary(.PostDec, args[0]) b.doReturn(r) } b.loadString("unused3") let a1 = b.callFunction(f1, withArgs: [args[0]]) let a2 = b.callFunction(f3, withArgs: [args[1]]) + evaluator.nextInstructionIsImportant(in: b) let r = b.binary(a1, a2, with: .Add) b.doReturn(r) } @@ -776,6 +781,63 @@ class MinimizerTests: XCTestCase { XCTAssertEqual(expectedProgram, actualProgram) } + func testNamedVariableRemoval() { + let evaluator = EvaluatorForMinimizationTests() + let fuzzer = makeMockFuzzer(evaluator: evaluator) + let b = fuzzer.makeBuilder() + + // Build input program to be minimized. + var print = b.createNamedVariable(forBuiltin: "print") + var v1 = b.loadInt(42) + let n1 = b.createNamedVariable("n1", declarationMode: .var, initialValue: v1) + // These uses of n1 can be replaced with v1 + evaluator.nextInstructionIsImportant(in: b) + var s1 = b.binary(n1, n1, with: .Add) + evaluator.nextInstructionIsImportant(in: b) + b.callFunction(print, withArgs: [s1]) + + // Similar situation, but now the original input is also reused. + var v2 = b.loadInt(43) + let n2 = b.createNamedVariable("n2", declarationMode: .var, initialValue: v2) + evaluator.nextInstructionIsImportant(in: b) + var s2 = b.binary(n2, v2, with: .Add) + evaluator.nextInstructionIsImportant(in: b) + b.callFunction(print, withArgs: [s2]) + + // Now the named variable itself is important and so shouldn't be removed. + evaluator.nextInstructionIsImportant(in: b) + var n3 = b.createNamedVariable("n3", declarationMode: .var, initialValue: n2) + // ... but this instruction can be removed (and s4 replaced with n3) + let s4 = b.binary(n3, n3, with: .Add) + evaluator.nextInstructionIsImportant(in: b) + b.callFunction(print, withArgs: [n3, s4, n3]) + + // This named variable can again be removed though. + let n4 = b.createNamedVariable("n4", declarationMode: .var, initialValue: n3) + evaluator.nextInstructionIsImportant(in: b) + b.callFunction(print, withArgs: [n1, n2, n3, n4]) + + let originalProgram = b.finalize() + + // Build expected output program. + print = b.createNamedVariable(forBuiltin: "print") + v1 = b.loadInt(42) + s1 = b.binary(v1, v1, with: .Add) + b.callFunction(print, withArgs: [s1]) + v2 = b.loadInt(43) + s2 = b.binary(v2, v2, with: .Add) + b.callFunction(print, withArgs: [s2]) + n3 = b.createNamedVariable("n3", declarationMode: .var, initialValue: v2) + b.callFunction(print, withArgs: [n3, n3, n3]) + b.callFunction(print, withArgs: [v1, v2, n3, n3]) + + let expectedProgram = b.finalize() + + // Perform minimization and check that the two programs are equal. + let actualProgram = minimize(originalProgram, with: fuzzer) + XCTAssertEqual(expectedProgram, actualProgram) + } + func testSimpleLoopMinimization() { let evaluator = EvaluatorForMinimizationTests() let fuzzer = makeMockFuzzer(evaluator: evaluator) @@ -900,9 +962,10 @@ class MinimizerTests: XCTestCase { let f = b.createNamedVariable(forBuiltin: "f") var g = b.createNamedVariable(forBuiltin: "g") var h = b.createNamedVariable(forBuiltin: "h") + var limit = b.loadInt(100) // In this case, the for-loop is actually important (we emulate that by marking the EndForLoopAfterthought instruction as important b.buildForLoop(i: { evaluator.nextInstructionIsImportant(in: b); return b.callFunction(d) }, - { i in b.callFunction(e); return b.compare(i, with: b.loadInt(100), using: .lessThan) }, + { i in b.callFunction(e); evaluator.nextInstructionIsImportant(in: b); return b.compare(i, with: limit, using: .lessThan) }, { i in b.callFunction(f); evaluator.nextInstructionIsImportant(in: b); b.unary(.PostInc, i); evaluator.nextInstructionIsImportant(in: b) }) { i in evaluator.nextInstructionIsImportant(in: b) b.callFunction(g, withArgs: [i]) @@ -916,8 +979,9 @@ class MinimizerTests: XCTestCase { d = b.createNamedVariable(forBuiltin: "d") g = b.createNamedVariable(forBuiltin: "g") h = b.createNamedVariable(forBuiltin: "h") + limit = b.loadInt(100) b.buildForLoop(i: { return b.callFunction(d) }, - { i in b.compare(i, with: b.loadInt(100), using: .lessThan) }, + { i in b.compare(i, with: limit, using: .lessThan) }, { i in b.unary(.PostInc, i) }) { i in b.callFunction(g, withArgs: [i]) } @@ -1190,18 +1254,27 @@ class MinimizerTests: XCTestCase { let vars = b.destruct(o, selecting: ["foo", "bar", "baz"]) var print = b.createNamedVariable(forBuiltin: "print") evaluator.nextInstructionIsImportant(in: b) - b.callFunction(print, withArgs: [vars[1]]) + b.callFunction(print, withArgs: [vars[0], vars[1], vars[2]]) let originalProgram = b.finalize() // Build expected output program. o = b.createNamedVariable(forBuiltin: "TheObject") + let foo = b.getProperty("foo", of: o) let bar = b.getProperty("bar", of: o) + let baz = b.getProperty("baz", of: o) print = b.createNamedVariable(forBuiltin: "print") - b.callFunction(print, withArgs: [bar]) + b.callFunction(print, withArgs: [foo, bar, baz]) let expectedProgram = b.finalize() + // Here we rely on a quirk of the minimization evaluator: it only ensures that the sum + // of all important operations doesn't decrease, and so this will allow the DestructObject + // to be converted into GetProperty operations but prevent both the DestructObject and the + // GetProperty from being removed entirely. + evaluator.operationIsImportant(DestructObject.self) + evaluator.operationIsImportant(GetProperty.self) + // Perform minimization and check that the two programs are equal. let actualProgram = minimize(originalProgram, with: fuzzer) XCTAssertEqual(expectedProgram, actualProgram) @@ -1217,18 +1290,27 @@ class MinimizerTests: XCTestCase { let vars = b.destruct(o, selecting: [0, 3, 4]) var print = b.createNamedVariable(forBuiltin: "print") evaluator.nextInstructionIsImportant(in: b) - b.callFunction(print, withArgs: [vars[2]]) + b.callFunction(print, withArgs: [vars[0], vars[1], vars[2]]) let originalProgram = b.finalize() // Build expected output program. o = b.createNamedVariable(forBuiltin: "TheArray") - let bar = b.getElement(4, of: o) + let val0 = b.getElement(0, of: o) + let val3 = b.getElement(3, of: o) + let val4 = b.getElement(4, of: o) print = b.createNamedVariable(forBuiltin: "print") - b.callFunction(print, withArgs: [bar]) + b.callFunction(print, withArgs: [val0, val3, val4]) let expectedProgram = b.finalize() + // Here we rely on a quirk of the minimization evaluator: it only ensures that the sum + // of all important operations doesn't decrease, and so this will allow the DestructArray + // to be converted into GetElement operations but prevent both the DestructArray and the + // GetElement from being removed entirely. + evaluator.operationIsImportant(DestructArray.self) + evaluator.operationIsImportant(GetElement.self) + // Perform minimization and check that the two programs are equal. let actualProgram = minimize(originalProgram, with: fuzzer) XCTAssertEqual(expectedProgram, actualProgram) @@ -1289,14 +1371,14 @@ class MinimizerTests: XCTestCase { // Build input program to be minimized. let o = b.createNamedVariable(forBuiltin: "o") let f = b.createNamedVariable(forBuiltin: "f") - let v1 = b.getProperty("p1", of: o, guard: true) - let v2 = b.getElement(2, of: o, guard: true) - let v3 = b.getComputedProperty(b.loadString("p3"), of: o, guard: true) - let v4 = b.callFunction(f, guard: true) - let v5 = b.callMethod("m", on: o, guard: true) - let keepInputsAlive = b.createNamedVariable(forBuiltin: "keepInputsAlive") - evaluator.nextInstructionIsImportant(in: b) - b.callFunction(keepInputsAlive, withArgs: [v1, v2, v3, v4, v5]) + b.getProperty("p1", of: o, guard: true) + b.getElement(2, of: o, guard: true) + b.getComputedProperty(b.loadString("p3"), of: o, guard: true) + b.callFunction(f, guard: true) + b.callMethod("m", on: o, guard: true) + + // Make sure that none of the operations are removed. + evaluator.operationsAreImportant([GetProperty.self, GetElement.self, GetComputedProperty.self, CallFunction.self, CallMethod.self]) let originalProgram = b.finalize() @@ -1367,8 +1449,14 @@ class MinimizerTests: XCTestCase { initialIndicesOfTheImportantInstructions.append(b.indexOfNextInstruction()) } - func operationIsImportant(_ op: T.Type) { - importantOperations.insert(T.name) + func operationIsImportant(_ op: Fuzzilli.Operation.Type) { + importantOperations.insert(op.name) + } + + func operationsAreImportant(_ ops: [Fuzzilli.Operation.Type]) { + for op in ops { + operationIsImportant(op) + } } func setOriginalProgram(_ program: Program) {