Skip to content

Commit

Permalink
[llvm] [aot] CUDA-AOT PR #2: Implemented AOTModuleLoader & AOTModuleB…
Browse files Browse the repository at this point in the history
…uilder for LLVM-CUDA backend (#5087)

* [llvm] [aot] Add LLVM-CPU AOT tests

* Refactored AOT test framework

* Fixed minor issue

* Enabled LLVM CPU-AOT for arm64 architecture

* Added aot unit tests programming guide

* [llvm] [aot] CUDA-AOT PR #2: Implemented AOT Module Loader for LLVM-CUDA backend

* Fixed typo

* Fixed minor issue

* Refactored AOT test framework

* [llvm] [aot] Add LLVM-CUDA AOT tests

* Added cuda device availability check
  • Loading branch information
jim19930609 authored Jun 6, 2022
1 parent 8ab9b9f commit 8e8a792
Show file tree
Hide file tree
Showing 16 changed files with 296 additions and 41 deletions.
29 changes: 2 additions & 27 deletions taichi/backends/cpu/aot_module_builder_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,15 @@
#include <algorithm>

#include "taichi/backends/cpu/codegen_cpu.h"

#include "taichi/llvm/launch_arg_info.h"

namespace taichi {
namespace lang {
namespace cpu {

void AotModuleBuilderImpl::dump(const std::string &output_dir,
const std::string &filename) const {
LlvmOfflineCacheFileWriter writer;
writer.set_data(std::move(cache_));
writer.dump(output_dir);
}

void AotModuleBuilderImpl::add_per_backend(const std::string &identifier,
Kernel *kernel) {
CodeGenLLVM::CompiledData AotModuleBuilderImpl::compile_kernel(Kernel *kernel) {
auto cgen = CodeGenCPU::make_codegen_llvm(kernel, /*ir=*/nullptr);
auto compiled = cgen->run_compilation();
LlvmOfflineCache::KernelCacheData kcache;
kcache.kernel_key = identifier;
kcache.module = compiled.llvm_module.get();
kcache.owned_module = std::move(compiled.llvm_module);
const auto &tasks = compiled.offloaded_tasks;
kcache.args = infer_launch_args(kernel);
kcache.offloaded_task_list.resize(tasks.size());
std::transform(tasks.begin(), tasks.end(), kcache.offloaded_task_list.begin(),
[](const auto &t) -> LlvmOfflineCache::OffloadedTaskCacheData {
LlvmOfflineCache::OffloadedTaskCacheData res;
res.name = t.name;
res.block_dim = t.block_dim;
res.grid_dim = t.grid_dim;
return res;
});
cache_.kernels[identifier] = std::move(kcache);
return cgen->run_compilation();
}

} // namespace cpu
Expand Down
12 changes: 3 additions & 9 deletions taichi/backends/cpu/aot_module_builder_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,15 @@

#include "taichi/aot/module_builder.h"
#include "taichi/llvm/llvm_offline_cache.h"
#include "taichi/llvm/llvm_aot_module_builder.h"

namespace taichi {
namespace lang {
namespace cpu {

class AotModuleBuilderImpl : public AotModuleBuilder {
public:
void dump(const std::string &output_dir,
const std::string &filename) const override;

protected:
void add_per_backend(const std::string &identifier, Kernel *kernel) override;

class AotModuleBuilderImpl : public LlvmAotModuleBuilder {
private:
mutable LlvmOfflineCache cache_;
CodeGenLLVM::CompiledData compile_kernel(Kernel *kernel) override;
};

} // namespace cpu
Expand Down
8 changes: 3 additions & 5 deletions taichi/backends/cpu/aot_module_loader_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@ class AotModuleImpl : public LlvmAotModule {
: LlvmAotModule(params.module_path, params.program) {
}

Arch arch() const override {
return Arch::x64;
}

private:
FunctionType convert_module_to_function(
const std::string &name,
LlvmOfflineCache::KernelCacheData &&loaded) override {
auto *tlctx = program_->get_llvm_context(program_->config->arch);
Arch arch = program_->config->arch;
TI_ASSERT(arch == Arch::x64 || arch == Arch::arm64);
auto *tlctx = program_->get_llvm_context(arch);

const auto &tasks = loaded.offloaded_task_list;
std::vector<OffloadedTask> offloaded_tasks;
Expand Down
19 changes: 19 additions & 0 deletions taichi/backends/cuda/aot_module_builder_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "taichi/backends/cuda/aot_module_builder_impl.h"

#include <algorithm>

#include "taichi/backends/cuda/codegen_cuda.h"
#include "taichi/llvm/launch_arg_info.h"

namespace taichi {
namespace lang {
namespace cuda {

CodeGenLLVM::CompiledData AotModuleBuilderImpl::compile_kernel(Kernel *kernel) {
auto cgen = CodeGenCUDA::make_codegen_llvm(kernel, /*ir=*/nullptr);
return cgen->run_compilation();
}

} // namespace cuda
} // namespace lang
} // namespace taichi
18 changes: 18 additions & 0 deletions taichi/backends/cuda/aot_module_builder_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "taichi/aot/module_builder.h"
#include "taichi/llvm/llvm_offline_cache.h"
#include "taichi/llvm/llvm_aot_module_builder.h"

namespace taichi {
namespace lang {
namespace cuda {

class AotModuleBuilderImpl : public LlvmAotModuleBuilder {
private:
CodeGenLLVM::CompiledData compile_kernel(Kernel *kernel) override;
};

} // namespace cuda
} // namespace lang
} // namespace taichi
66 changes: 66 additions & 0 deletions taichi/backends/cuda/aot_module_loader_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#include "taichi/backends/cuda/aot_module_loader_impl.h"
#include "taichi/llvm/llvm_aot_module_loader.h"

#include "taichi/llvm/llvm_offline_cache.h"
#include "taichi/llvm/llvm_program.h"
#include "taichi/backends/cuda/codegen_cuda.h"

namespace taichi {
namespace lang {
namespace {

class AotModuleImpl : public LlvmAotModule {
public:
explicit AotModuleImpl(const cuda::AotModuleParams &params)
: LlvmAotModule(params.module_path, params.program) {
}

private:
FunctionType convert_module_to_function(
const std::string &name,
LlvmOfflineCache::KernelCacheData &&loaded) override {
Arch arch = program_->config->arch;
TI_ASSERT(arch == Arch::cuda);
auto *tlctx = program_->get_llvm_context(arch);

const auto &tasks = loaded.offloaded_task_list;
std::vector<OffloadedTask> offloaded_tasks;
offloaded_tasks.reserve(tasks.size());
for (const auto &t : tasks) {
OffloadedTask ot{/*codegen=*/nullptr};
ot.name = t.name;
ot.block_dim = t.block_dim;
ot.grid_dim = t.grid_dim;
offloaded_tasks.push_back(std::move(ot));
}

CUDAModuleToFunctionConverter converter{tlctx, program_};
return converter.convert(name, loaded.args, std::move(loaded.owned_module),
std::move(offloaded_tasks));
}

std::unique_ptr<aot::KernelTemplate> make_new_kernel_template(
const std::string &name) override {
TI_NOT_IMPLEMENTED;
return nullptr;
}

std::unique_ptr<aot::Field> make_new_field(const std::string &name) override {
TI_NOT_IMPLEMENTED;
return nullptr;
}
};

} // namespace

namespace cuda {

std::unique_ptr<aot::Module> make_aot_module(std::any mod_params) {
auto mod = std::make_unique<AotModuleImpl>(
std::any_cast<const AotModuleParams &>(mod_params));
return mod;
}

} // namespace cuda
} // namespace lang
} // namespace taichi
21 changes: 21 additions & 0 deletions taichi/backends/cuda/aot_module_loader_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include "taichi/aot/module_loader.h"

namespace taichi {
namespace lang {

class LlvmProgramImpl;

namespace cuda {

struct TI_DLL_EXPORT AotModuleParams {
std::string module_path;
LlvmProgramImpl *program{nullptr};
};

TI_DLL_EXPORT std::unique_ptr<aot::Module> make_aot_module(std::any mod_params);

} // namespace cuda
} // namespace lang
} // namespace taichi
8 changes: 8 additions & 0 deletions taichi/backends/cuda/codegen_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,14 @@ class CodeGenLLVMCUDA : public CodeGenLLVM {
}
};

#ifdef TI_WITH_LLVM
// static
std::unique_ptr<CodeGenLLVM> CodeGenCUDA::make_codegen_llvm(Kernel *kernel,
IRNode *ir) {
return std::make_unique<CodeGenLLVMCUDA>(kernel, ir);
}
#endif // TI_WITH_LLVM

static void set_arg_external_array(RuntimeContext *ctx,
const std::string &kernel_name,
int arg_id,
Expand Down
6 changes: 6 additions & 0 deletions taichi/backends/cuda/codegen_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ class CodeGenCUDA : public KernelCodeGen {
: KernelCodeGen(kernel, ir) {
}

// TODO: Stop defining this macro guards in the headers
#ifdef TI_WITH_LLVM
static std::unique_ptr<CodeGenLLVM> make_codegen_llvm(Kernel *kernel,
IRNode *ir);
#endif // TI_WITH_LLVM

FunctionType codegen() override;
};

Expand Down
38 changes: 38 additions & 0 deletions taichi/llvm/llvm_aot_module_builder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "taichi/llvm/llvm_aot_module_builder.h"

#include <algorithm>
#include "taichi/llvm/launch_arg_info.h"

namespace taichi {
namespace lang {

void LlvmAotModuleBuilder::dump(const std::string &output_dir,
const std::string &filename) const {
LlvmOfflineCacheFileWriter writer;
writer.set_data(std::move(cache_));
writer.dump(output_dir);
}

void LlvmAotModuleBuilder::add_per_backend(const std::string &identifier,
Kernel *kernel) {
auto compiled = compile_kernel(kernel);
LlvmOfflineCache::KernelCacheData kcache;
kcache.kernel_key = identifier;
kcache.module = compiled.llvm_module.get();
kcache.owned_module = std::move(compiled.llvm_module);
const auto &tasks = compiled.offloaded_tasks;
kcache.args = infer_launch_args(kernel);
kcache.offloaded_task_list.resize(tasks.size());
std::transform(tasks.begin(), tasks.end(), kcache.offloaded_task_list.begin(),
[](const auto &t) -> LlvmOfflineCache::OffloadedTaskCacheData {
LlvmOfflineCache::OffloadedTaskCacheData res;
res.name = t.name;
res.block_dim = t.block_dim;
res.grid_dim = t.grid_dim;
return res;
});
cache_.kernels[identifier] = std::move(kcache);
}

} // namespace lang
} // namespace taichi
24 changes: 24 additions & 0 deletions taichi/llvm/llvm_aot_module_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

#include "taichi/aot/module_builder.h"
#include "taichi/llvm/llvm_offline_cache.h"
#include "taichi/codegen/codegen_llvm.h"

namespace taichi {
namespace lang {

class LlvmAotModuleBuilder : public AotModuleBuilder {
public:
void dump(const std::string &output_dir,
const std::string &filename) const override;

protected:
void add_per_backend(const std::string &identifier, Kernel *kernel) override;
virtual CodeGenLLVM::CompiledData compile_kernel(Kernel *kernel) = 0;

private:
mutable LlvmOfflineCache cache_;
};

} // namespace lang
} // namespace taichi
4 changes: 4 additions & 0 deletions taichi/llvm/llvm_aot_module_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class LlvmAotModule : public aot::Module {
TI_ASSERT(program_ != nullptr);
}

Arch arch() const override {
return program_->config->arch;
}

uint64_t version() const override {
return 0;
}
Expand Down
8 changes: 8 additions & 0 deletions taichi/llvm/llvm_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "taichi/backends/cuda/cuda_device.h"

#if defined(TI_WITH_CUDA)
#include "taichi/backends/cuda/aot_module_builder_impl.h"
#include "taichi/backends/cuda/cuda_driver.h"
#include "taichi/backends/cuda/codegen_cuda.h"
#include "taichi/backends/cuda/cuda_context.h"
Expand Down Expand Up @@ -341,6 +342,13 @@ std::unique_ptr<AotModuleBuilder> LlvmProgramImpl::make_aot_module_builder() {
if (config->arch == Arch::x64 || config->arch == Arch::arm64) {
return std::make_unique<cpu::AotModuleBuilderImpl>();
}

#if defined(TI_WITH_CUDA)
if (config->arch == Arch::cuda) {
return std::make_unique<cuda::AotModuleBuilderImpl>();
}
#endif

TI_NOT_IMPLEMENTED;
return nullptr;
}
Expand Down
25 changes: 25 additions & 0 deletions tests/cpp/backends/llvm/cuda_aot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os

import taichi as ti


def compile_aot():
ti.init(arch=ti.cuda)

@ti.kernel
def run(base: int, arr: ti.types.ndarray()):
for i in arr:
arr[i] = base + i

arr = ti.ndarray(int, shape=16)
run(42, arr)

assert "TAICHI_AOT_FOLDER_PATH" in os.environ.keys()
dir_name = str(os.environ["TAICHI_AOT_FOLDER_PATH"])

m = ti.aot.Module(ti.cuda)
m.add_kernel(run, template_args={'arr': arr})
m.save(dir_name, 'cuda-aot')


compile_aot()
Loading

0 comments on commit 8e8a792

Please sign in to comment.