Skip to content

Commit

Permalink
Revert "cacheInputs propagates allocation only for matmul schedulers." (
Browse files Browse the repository at this point in the history
#3706)

Reverts #3621 to fix #3701
  • Loading branch information
wujingyue authored and naoyam committed Jan 14, 2025
1 parent f481637 commit c3c2093
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 17 deletions.
3 changes: 1 addition & 2 deletions csrc/scheduler/ampere_multi_matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,7 @@ void AmpereMultipleMatmulScheduler::cacheInputsAndOutputs() {
scheduler_utils::clearMemorySpace(fusion_);

// Cache inputs
scheduler_utils::cacheInputs(
fusion_, /*unroll=*/true, /*propagate_allocation=*/true);
scheduler_utils::cacheInputs(fusion_, /*unroll=*/true);

// Cache and fork outputs
cached_outputs_ =
Expand Down
3 changes: 1 addition & 2 deletions csrc/scheduler/hopper_multi_matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ void HopperMultipleMatmulScheduler::cacheInputsAndOutputs() {
scheduler_utils::clearMemorySpace(fusion_);

// Cache inputs
scheduler_utils::cacheInputs(
fusion_, /*unroll=*/true, /*propagate_allocation=*/true);
scheduler_utils::cacheInputs(fusion_, /*unroll=*/true);

// Cache and fork outputs
scheduler_utils::cacheAndForkOutputs(fusion_, /*unroll=*/true);
Expand Down
13 changes: 5 additions & 8 deletions csrc/scheduler/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1187,10 +1187,7 @@ void clearMemorySpace(Fusion* fusion) {

// Returns cached after tensors of the fusion inputs if unrolled. Otherwise
// return empty vector.
std::vector<TensorView*> cacheInputs(
Fusion* fusion,
bool unroll,
bool propagate_allocation) {
std::vector<TensorView*> cacheInputs(Fusion* fusion, bool unroll) {
if (!unroll) {
return {};
}
Expand Down Expand Up @@ -1227,10 +1224,10 @@ std::vector<TensorView*> cacheInputs(
}

auto cached_tv = tv->cacheAfter(
LoadStoreOpType::Set,
CacheOp::Unspecified,
propagate_allocation,
cached_uses);
/*op_type=*/LoadStoreOpType::Set,
/*cache_op=*/CacheOp::Unspecified,
/*propagate_allocation_domain=*/true,
/*cached_uses=*/cached_uses);
cached_inputs.emplace_back(cached_tv);
}
return cached_inputs;
Expand Down
5 changes: 1 addition & 4 deletions csrc/scheduler/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,7 @@ void clearMemorySpace(Fusion* fusion);

// Returns cached after tensors of the fusion inputs if unrolled. Otherwise
// return empty vector.
std::vector<TensorView*> cacheInputs(
Fusion* fusion,
bool unroll,
bool propagate_allocation = false);
std::vector<TensorView*> cacheInputs(Fusion* fusion, bool unroll);

// Returns the pairs of <cache of each fusion output, corresponding output> for
// all outputs.
Expand Down
4 changes: 3 additions & 1 deletion tests/cpp/test_allocation_domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1426,9 +1426,11 @@ TEST_F(AllocationDomainTest, InputAllocationIsSplit_Concrete) {
fusion->addInput(in);
fusion->addOutput(out);

// Ideally, loop should stay the same as logical because a fusion input comes
// from outside and isn't generated by a loop in the containing kernel (cf.
// #3479).
in->split(0, 2);
in->setAllocationDomain(in->getLoopDomain(), true);
in->setLoopDomain(in->getLogicalDomain());

FusionExecutorCache executor_cache(std::move(fusion));
auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA);
Expand Down

0 comments on commit c3c2093

Please sign in to comment.