diff --git a/.gitmodules b/.gitmodules index e9d6d0020..cf89bdae6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -47,6 +47,3 @@ [submodule "external/SPIRV-Tools"] path = external/SPIRV-Tools url = https://github.com/KhronosGroup/SPIRV-Tools -[submodule "external/SPIRV-Cross"] - path = external/SPIRV-Cross - url = https://github.com/KhronosGroup/SPIRV-Cross diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index db4f6bf3b..3d6e6d2a0 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -92,12 +92,7 @@ file(GLOB TAICHI_OPENGL_REQUIRED_SOURCE "taichi/backends/opengl/codegen_opengl.*" "taichi/backends/opengl/struct_opengl.*" ) -file(GLOB TAICHI_VULKAN_REQUIRED_SOURCE - "taichi/backends/vulkan/runtime.h" - "taichi/backends/vulkan/runtime.cpp" - "taichi/backends/vulkan/snode_struct_compiler.cpp" - "taichi/backends/vulkan/snode_struct_compiler.h" -) +file(GLOB TAICHI_VULKAN_REQUIRED_SOURCE "taichi/backends/vulkan/runtime.h" "taichi/backends/vulkan/runtime.cpp") list(REMOVE_ITEM TAICHI_CORE_SOURCE ${TAICHI_BACKEND_SOURCE}) @@ -261,10 +256,6 @@ else() message(STATUS "TI_WITH_CUDA_TOOLKIT = OFF") endif() -add_subdirectory(external/SPIRV-Cross) -target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Cross) -target_link_libraries(${CORE_LIBRARY_NAME} spirv-cross-glsl spirv-cross-core) - if (TI_WITH_VULKAN) # Vulkan libs # https://cmake.org/cmake/help/latest/module/FindVulkan.html diff --git a/examples/minimal.py b/examples/minimal.py index ce7c87a32..81e1d6c30 100644 --- a/examples/minimal.py +++ b/examples/minimal.py @@ -4,9 +4,8 @@ @ti.kernel -def p() -> ti.f32: +def p(): print(42) - return 40 + 2 -print(p()) +p() diff --git a/external/SPIRV-Cross b/external/SPIRV-Cross deleted file mode 160000 index 97a438d21..000000000 --- a/external/SPIRV-Cross +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 97a438d214b24e4958ca137a18639670648cedd0 diff --git a/taichi/backends/device.h b/taichi/backends/device.h index f9e5ff59f..e66697f94 100644 --- a/taichi/backends/device.h +++ b/taichi/backends/device.h @@ -12,27 +12,25 @@ namespace lang { // Or the backend runtime itself // Capabilities are per-device enum class DeviceCapability : uint32_t { - // Vulkan Caps vk_api_version, + vk_spirv_version, vk_has_physical_features2, + vk_has_int8, + vk_has_int16, + vk_has_int64, + vk_has_float16, + vk_has_float64, vk_has_external_memory, + vk_has_atomic_i64, + vk_has_atomic_float, // load, store, exchange + vk_has_atomic_float_add, + vk_has_atomic_float_minmax, + vk_has_atomic_float64, // load, store, exchange + vk_has_atomic_float64_add, + vk_has_atomic_float64_minmax, vk_has_surface, vk_has_presentation, - // SPIR-V Caps - spirv_version, - spirv_has_int8, - spirv_has_int16, - spirv_has_int64, - spirv_has_float16, - spirv_has_float64, - spirv_has_atomic_i64, - spirv_has_atomic_float, // load, store, exchange - spirv_has_atomic_float_add, - spirv_has_atomic_float_minmax, - spirv_has_atomic_float64, // load, store, exchange - spirv_has_atomic_float64_add, - spirv_has_atomic_float64_minmax, - spirv_has_variable_ptr, + vk_has_spv_variable_ptr, }; class Device; diff --git a/taichi/backends/vulkan/codegen_vulkan.cpp b/taichi/backends/vulkan/codegen_vulkan.cpp index 8eb6dcf55..44d7d2136 100644 --- a/taichi/backends/vulkan/codegen_vulkan.cpp +++ b/taichi/backends/vulkan/codegen_vulkan.cpp @@ -825,7 +825,7 @@ class TaskCodegen : public IRVisitor { spirv::Value data = ir_->query_value(stmt->val->raw_name()); spirv::Value val; if (dt->is_primitive(PrimitiveTypeID::f32)) { - if (device_->get_cap(DeviceCapability::spirv_has_atomic_float_add) && + if (device_->get_cap(DeviceCapability::vk_has_atomic_float_add) && stmt->op_type == AtomicOpType::add && is_compiled_struct) { val = ir_->make_value( spv::OpAtomicFAddEXT, ir_->get_primitive_type(dt), addr_ptr, @@ -1024,7 +1024,7 @@ class TaskCodegen : public IRVisitor { task_attribs_.advisory_num_threads_per_group, 1, 1}; ir_->set_work_group_size(group_size); std::vector buffers; - if (device_->get_cap(DeviceCapability::spirv_version) > 0x10300) { + if (device_->get_cap(DeviceCapability::vk_spirv_version) > 0x10300) { for (const auto &bb : task_attribs_.buffer_binds) { const auto it = buffer_value_map_.find(bb.buffer); if (it != buffer_value_map_.end()) { @@ -1140,23 +1140,12 @@ class TaskCodegen : public IRVisitor { // For now, |total_invocs_name| is equal to |total_elems|. Once we support // dynamic range, they will be different. // https://www.khronos.org/opengl/wiki/Compute_Shader#Inputs - - // HLSL & WGSL cross compilers do not support this builtin - /* spirv::Value total_invocs = ir_->cast( ir_->i32_type(), ir_->mul(ir_->get_num_work_groups(0), ir_->uint_immediate_number( ir_->u32_type(), task_attribs_.advisory_num_threads_per_group, true))); - */ - const int group_x = (task_attribs_.advisory_total_num_threads + - task_attribs_.advisory_num_threads_per_group - 1) / - task_attribs_.advisory_num_threads_per_group; - spirv::Value total_invocs = ir_->uint_immediate_number( - ir_->i32_type(), group_x * task_attribs_.advisory_num_threads_per_group, - false); - ir_->debug(spv::OpName, total_invocs, total_invocs_name); // Must get init label after making value(to make sure they are correct) @@ -1390,15 +1379,12 @@ class KernelCodegen { // Enable to dump SPIR-V assembly of kernels #if 0 - std::string spirv_asm; - spirv_tools_->Disassemble(optimized_spv, &spirv_asm); - TI_WARN("SPIR-V Assembly dump for {} :\n{}\n\n", params_.ti_kernel_name, - spirv_asm); - - std::ofstream fout((params_.ti_kernel_name).c_str(), - std::ios::binary | std::ios::out); - fout.write(reinterpret_cast(task_res.spirv_code.data()), - task_res.spirv_code.size() * sizeof(uint32_t)); + std::string spirv_asm; + spirv_tools_->Disassemble(optimized_spv, &spirv_asm); + TI_WARN("SPIR-V Assembly dump for {} :\n{}\n\n",params_.ti_kernel_name, spirv_asm); + + std::ofstream fout((params_.ti_kernel_name).c_str(), std::ios::binary | std::ios::out); + fout.write(reinterpret_cast(task_res.spirv_code.data()), task_res.spirv_code.size() * sizeof(uint32_t)); fout.close(); #endif diff --git a/taichi/backends/vulkan/embedded_device.cpp b/taichi/backends/vulkan/embedded_device.cpp index 689ce45b8..e418581fb 100644 --- a/taichi/backends/vulkan/embedded_device.cpp +++ b/taichi/backends/vulkan/embedded_device.cpp @@ -187,6 +187,8 @@ EmbeddedVulkanDevice::EmbeddedVulkanDevice( pick_physical_device(); create_logical_device(); + // TODO: Change the ownership hierarchy, the taichi Device class should be at + // the top level { VulkanDevice::Params params; params.instance = instance_; @@ -215,6 +217,10 @@ EmbeddedVulkanDevice::~EmbeddedVulkanDevice() { vkDestroyInstance(instance_, kNoVkAllocCallbacks); } +Device *EmbeddedVulkanDevice::get_ti_device() const { + return ti_device_.get(); +} + void EmbeddedVulkanDevice::create_instance() { VkApplicationInfo app_info{}; app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; @@ -375,10 +381,10 @@ void EmbeddedVulkanDevice::create_logical_device() { vkGetPhysicalDeviceProperties(physical_device_, &physical_device_properties); ti_device_->set_cap(DeviceCapability::vk_api_version, physical_device_properties.apiVersion); - ti_device_->set_cap(DeviceCapability::spirv_version, 0x10000); + ti_device_->set_cap(DeviceCapability::vk_spirv_version, 0x10000); if (physical_device_properties.apiVersion >= VK_API_VERSION_1_1) { - ti_device_->set_cap(DeviceCapability::spirv_version, 0x10300); + ti_device_->set_cap(DeviceCapability::vk_spirv_version, 0x10300); } // Detect extensions @@ -421,7 +427,7 @@ void EmbeddedVulkanDevice::create_logical_device() { } else if (name == VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME) { enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_SPIRV_1_4_EXTENSION_NAME) { - ti_device_->set_cap(DeviceCapability::spirv_version, 0x10400); + ti_device_->set_cap(DeviceCapability::vk_spirv_version, 0x10400); enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME) { ti_device_->set_cap(DeviceCapability::vk_has_external_memory, true); @@ -448,15 +454,15 @@ void EmbeddedVulkanDevice::create_logical_device() { if (device_supported_features.shaderInt16) { device_features.shaderInt16 = true; - ti_device_->set_cap(DeviceCapability::spirv_has_int16, true); + ti_device_->set_cap(DeviceCapability::vk_has_int16, true); } if (device_supported_features.shaderInt64) { device_features.shaderInt64 = true; - ti_device_->set_cap(DeviceCapability::spirv_has_int64, true); + ti_device_->set_cap(DeviceCapability::vk_has_int64, true); } if (device_supported_features.shaderFloat64) { device_features.shaderFloat64 = true; - ti_device_->set_cap(DeviceCapability::spirv_has_float64, true); + ti_device_->set_cap(DeviceCapability::vk_has_float64, true); } if (device_supported_features.wideLines) { device_features.wideLines = true; @@ -493,7 +499,7 @@ void EmbeddedVulkanDevice::create_logical_device() { if (variable_ptr_feature.variablePointers && variable_ptr_feature.variablePointersStorageBuffer) { - ti_device_->set_cap(DeviceCapability::spirv_has_variable_ptr, true); + ti_device_->set_cap(DeviceCapability::vk_has_spv_variable_ptr, true); } *pNextEnd = &variable_ptr_feature; pNextEnd = &variable_ptr_feature.pNext; @@ -505,14 +511,13 @@ void EmbeddedVulkanDevice::create_logical_device() { vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); if (shader_atomic_float_feature.shaderBufferFloat32AtomicAdd) { - ti_device_->set_cap(DeviceCapability::spirv_has_atomic_float_add, true); + ti_device_->set_cap(DeviceCapability::vk_has_atomic_float_add, true); } else if (shader_atomic_float_feature.shaderBufferFloat64AtomicAdd) { - ti_device_->set_cap(DeviceCapability::spirv_has_atomic_float64_add, - true); + ti_device_->set_cap(DeviceCapability::vk_has_atomic_float64_add, true); } else if (shader_atomic_float_feature.shaderBufferFloat32Atomics) { - ti_device_->set_cap(DeviceCapability::spirv_has_atomic_float, true); + ti_device_->set_cap(DeviceCapability::vk_has_atomic_float, true); } else if (shader_atomic_float_feature.shaderBufferFloat64Atomics) { - ti_device_->set_cap(DeviceCapability::spirv_has_atomic_float64, true); + ti_device_->set_cap(DeviceCapability::vk_has_atomic_float64, true); } *pNextEnd = &shader_atomic_float_feature; pNextEnd = &shader_atomic_float_feature.pNext; @@ -524,9 +529,9 @@ void EmbeddedVulkanDevice::create_logical_device() { vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); if (shader_f16_i8_feature.shaderFloat16) { - ti_device_->set_cap(DeviceCapability::spirv_has_float16, true); + ti_device_->set_cap(DeviceCapability::vk_has_float16, true); } else if (shader_f16_i8_feature.shaderInt8) { - ti_device_->set_cap(DeviceCapability::spirv_has_int8, true); + ti_device_->set_cap(DeviceCapability::vk_has_int8, true); } *pNextEnd = &shader_f16_i8_feature; pNextEnd = &shader_f16_i8_feature.pNext; @@ -549,6 +554,8 @@ void EmbeddedVulkanDevice::create_logical_device() { if (params_.is_for_ui) { vkGetDeviceQueue(device_, queue_family_indices_.graphics_family.value(), 0, &graphics_queue_); + vkGetDeviceQueue(device_, queue_family_indices_.graphics_family.value(), 0, + &present_queue_); } vkGetDeviceQueue(device_, queue_family_indices_.compute_family.value(), 0, diff --git a/taichi/backends/vulkan/embedded_device.h b/taichi/backends/vulkan/embedded_device.h index 1be3f7e11..4b70bac35 100644 --- a/taichi/backends/vulkan/embedded_device.h +++ b/taichi/backends/vulkan/embedded_device.h @@ -61,14 +61,36 @@ class EmbeddedVulkanDevice { explicit EmbeddedVulkanDevice(const Params ¶ms); ~EmbeddedVulkanDevice(); - const VulkanDevice *device() const { - return ti_device_.get(); + VkInstance instance() { + return instance_; } VulkanDevice *device() { return ti_device_.get(); } + const VulkanDevice *device() const { + return ti_device_.get(); + } + + VkPhysicalDevice physical_device() const { + return physical_device_; + } + + VkSurfaceKHR surface() const { + return surface_; + } + + VkInstance instance() const { + return instance_; + } + + const VulkanQueueFamilyIndices &queue_family_indices() const { + return queue_family_indices_; + } + + Device *get_ti_device() const; + private: void create_instance(); void setup_debug_messenger(); @@ -81,12 +103,19 @@ class EmbeddedVulkanDevice { VkPhysicalDevice physical_device_{VK_NULL_HANDLE}; VulkanQueueFamilyIndices queue_family_indices_; VkDevice device_{VK_NULL_HANDLE}; - + // TODO: It's probably not right to put these per-queue things here. However, + // in Taichi we only use a single queue on a single device (i.e. a single CUDA + // stream), so it doesn't make a difference. VkQueue compute_queue_{VK_NULL_HANDLE}; VkQueue graphics_queue_{VK_NULL_HANDLE}; + VkQueue present_queue_{VK_NULL_HANDLE}; VkSurfaceKHR surface_{VK_NULL_HANDLE}; + // TODO: Shall we have dedicated command pools for COMPUTE and TRANSFER + // commands, respectively? + VkCommandPool command_pool_{VK_NULL_HANDLE}; + std::unique_ptr ti_device_{nullptr}; Params params_; diff --git a/taichi/backends/vulkan/loader.cpp b/taichi/backends/vulkan/loader.cpp index d71a5f069..2e4eaae3b 100644 --- a/taichi/backends/vulkan/loader.cpp +++ b/taichi/backends/vulkan/loader.cpp @@ -40,10 +40,6 @@ PFN_vkVoidFunction VulkanLoader::load_function(const char *name) { return result; } -bool is_vulkan_api_available() { - return VulkanLoader::instance().init(); -} - } // namespace vulkan } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/loader.h b/taichi/backends/vulkan/loader.h index 220a7411e..4fbda6bb6 100644 --- a/taichi/backends/vulkan/loader.h +++ b/taichi/backends/vulkan/loader.h @@ -35,8 +35,6 @@ class VulkanLoader { VkDevice vulkan_device_{VK_NULL_HANDLE}; }; -bool is_vulkan_api_available(); - } // namespace vulkan } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/runtime.cpp b/taichi/backends/vulkan/runtime.cpp index b942c05e6..ae6d5d6ab 100644 --- a/taichi/backends/vulkan/runtime.cpp +++ b/taichi/backends/vulkan/runtime.cpp @@ -9,6 +9,18 @@ #include #include +#include "taichi/util/environ_config.h" + +#ifdef TI_WITH_VULKAN +#include "taichi/backends/vulkan/embedded_device.h" +#include "taichi/backends/vulkan/vulkan_utils.h" +#include "taichi/backends/vulkan/loader.h" + +#include "vk_mem_alloc.h" +#include "taichi/backends/vulkan/vulkan_device.h" +#endif // TI_WITH_VULKAN + +#include "taichi/math/arithmetic.h" #define TI_RUNTIME_HOST #include "taichi/program/context.h" #undef TI_RUNTIME_HOST @@ -17,6 +29,8 @@ namespace taichi { namespace lang { namespace vulkan { +#ifdef TI_WITH_VULKAN + namespace { class StopWatch { public: @@ -38,6 +52,16 @@ class StopWatch { std::chrono::time_point begin_; }; +using BufferType = TaskAttributes::BufferType; +using BufferInfo = TaskAttributes::BufferInfo; +using BufferBind = TaskAttributes::BufferBind; +using BufferInfoHasher = TaskAttributes::BufferInfoHasher; + +// TODO: In the future this isn't necessarily a pointer, since DeviceAllocation +// is already a pretty cheap handle> +using InputBuffersMap = + std::unordered_map; + class HostDeviceContextBlitter { public: HostDeviceContextBlitter(const KernelContextAttributes *ctx_attribs, @@ -79,22 +103,22 @@ class HostDeviceContextBlitter { std::memcpy(device_ptr, host_ptr, arg.stride); break; } - if (device_->get_cap(DeviceCapability::spirv_has_int8)) { + if (device_->get_cap(DeviceCapability::vk_has_int8)) { TO_DEVICE(i8, int8) TO_DEVICE(u8, uint8) } - if (device_->get_cap(DeviceCapability::spirv_has_int16)) { + if (device_->get_cap(DeviceCapability::vk_has_int16)) { TO_DEVICE(i16, int16) TO_DEVICE(u16, uint16) } TO_DEVICE(i32, int32) TO_DEVICE(u32, uint32) TO_DEVICE(f32, float32) - if (device_->get_cap(DeviceCapability::spirv_has_int64)) { + if (device_->get_cap(DeviceCapability::vk_has_int64)) { TO_DEVICE(i64, int64) TO_DEVICE(u64, uint64) } - if (device_->get_cap(DeviceCapability::spirv_has_float64)) { + if (device_->get_cap(DeviceCapability::vk_has_float64)) { TO_DEVICE(f64, float64) } TI_ERROR("Vulkan does not support arg type={}", data_type_name(arg.dt)); @@ -161,22 +185,22 @@ class HostDeviceContextBlitter { std::memcpy(host_ptr, device_ptr, ret.stride); break; } - if (device_->get_cap(DeviceCapability::spirv_has_int8)) { + if (device_->get_cap(DeviceCapability::vk_has_int8)) { TO_HOST(i8, int8) TO_HOST(u8, uint8) } - if (device_->get_cap(DeviceCapability::spirv_has_int16)) { + if (device_->get_cap(DeviceCapability::vk_has_int16)) { TO_HOST(i16, int16) TO_HOST(u16, uint16) } TO_HOST(i32, int32) TO_HOST(u32, uint32) TO_HOST(f32, float32) - if (device_->get_cap(DeviceCapability::spirv_has_int64)) { + if (device_->get_cap(DeviceCapability::vk_has_int64)) { TO_HOST(i64, int64) TO_HOST(u64, uint64) } - if (device_->get_cap(DeviceCapability::spirv_has_float64)) { + if (device_->get_cap(DeviceCapability::vk_has_float64)) { TO_HOST(f64, float64) } TI_ERROR("Vulkan does not support return value type={}", @@ -212,225 +236,358 @@ class HostDeviceContextBlitter { Device *const device_; }; -} // namespace - // Info for launching a compiled Taichi kernel, which consists of a series of // Vulkan pipelines. +class CompiledTaichiKernel { + public: + struct Params { + const TaichiKernelAttributes *ti_kernel_attribs{nullptr}; + std::vector> spirv_bins; + std::vector compiled_structs; + + VulkanDevice *device{nullptr}; + std::vector root_buffers; + DeviceAllocation *global_tmps_buffer{nullptr}; + }; + + CompiledTaichiKernel(const Params &ti_params) + : ti_kernel_attribs_(*ti_params.ti_kernel_attribs), + device_(ti_params.device) { + input_buffers_[BufferType::GlobalTmps] = ti_params.global_tmps_buffer; + for (int root = 0; root < ti_params.compiled_structs.size(); ++root) { + BufferInfo buffer = {BufferType::Root, root}; + input_buffers_[buffer] = ti_params.root_buffers[root]; + } + const auto ctx_sz = ti_kernel_attribs_.ctx_attribs.total_bytes(); + if (!ti_kernel_attribs_.ctx_attribs.empty()) { + Device::AllocParams params; + ctx_buffer_ = ti_params.device->allocate_memory_unique( + {size_t(ctx_sz), + /*host_write=*/true, /*host_read=*/false, + /*export_sharing=*/false, AllocUsage::Storage}); + ctx_buffer_host_ = ti_params.device->allocate_memory_unique( + {size_t(ctx_sz), + /*host_write=*/false, /*host_read=*/true, + /*export_sharing=*/false, AllocUsage::Storage}); + input_buffers_[BufferType::Context] = ctx_buffer_.get(); + } -CompiledTaichiKernel::CompiledTaichiKernel(const Params &ti_params) - : ti_kernel_attribs_(*ti_params.ti_kernel_attribs), - device_(ti_params.device) { - input_buffers_[BufferType::GlobalTmps] = ti_params.global_tmps_buffer; - for (int root = 0; root < ti_params.compiled_structs.size(); ++root) { - BufferInfo buffer = {BufferType::Root, root}; - input_buffers_[buffer] = ti_params.root_buffers[root]; - } - const auto ctx_sz = ti_kernel_attribs_.ctx_attribs.total_bytes(); - if (!ti_kernel_attribs_.ctx_attribs.empty()) { - Device::AllocParams params; - ctx_buffer_ = ti_params.device->allocate_memory_unique( - {size_t(ctx_sz), - /*host_write=*/true, /*host_read=*/false, - /*export_sharing=*/false, AllocUsage::Storage}); - ctx_buffer_host_ = ti_params.device->allocate_memory_unique( - {size_t(ctx_sz), - /*host_write=*/false, /*host_read=*/true, - /*export_sharing=*/false, AllocUsage::Storage}); - input_buffers_[BufferType::Context] = ctx_buffer_.get(); + const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; + const auto &spirv_bins = ti_params.spirv_bins; + TI_ASSERT(task_attribs.size() == spirv_bins.size()); + + for (int i = 0; i < task_attribs.size(); ++i) { + PipelineSourceDesc source_desc{PipelineSourceType::spirv_binary, + (void *)spirv_bins[i].data(), + spirv_bins[i].size() * sizeof(uint32_t)}; + auto vp = ti_params.device->create_pipeline(source_desc, + ti_kernel_attribs_.name); + pipelines_.push_back(std::move(vp)); + } } - const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; - const auto &spirv_bins = ti_params.spirv_bins; - TI_ASSERT(task_attribs.size() == spirv_bins.size()); + const TaichiKernelAttributes &ti_kernel_attribs() const { + return ti_kernel_attribs_; + } - for (int i = 0; i < task_attribs.size(); ++i) { - PipelineSourceDesc source_desc{PipelineSourceType::spirv_binary, - (void *)spirv_bins[i].data(), - spirv_bins[i].size() * sizeof(uint32_t)}; - auto vp = - ti_params.device->create_pipeline(source_desc, ti_kernel_attribs_.name); - pipelines_.push_back(std::move(vp)); + size_t num_pipelines() const { + return pipelines_.size(); } -} -const TaichiKernelAttributes &CompiledTaichiKernel::ti_kernel_attribs() const { - return ti_kernel_attribs_; -} + DeviceAllocation *ctx_buffer() const { + return ctx_buffer_.get(); + } -size_t CompiledTaichiKernel::num_pipelines() const { - return pipelines_.size(); -} + DeviceAllocation *ctx_buffer_host() const { + return ctx_buffer_host_.get(); + } -DeviceAllocation *CompiledTaichiKernel::ctx_buffer() const { - return ctx_buffer_.get(); -} + void command_list(CommandList *cmdlist) const { + const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; + + for (int i = 0; i < task_attribs.size(); ++i) { + const auto &attribs = task_attribs[i]; + auto vp = pipelines_[i].get(); + const int group_x = (attribs.advisory_total_num_threads + + attribs.advisory_num_threads_per_group - 1) / + attribs.advisory_num_threads_per_group; + ResourceBinder *binder = vp->resource_binder(); + for (auto &bind : attribs.buffer_binds) { + binder->rw_buffer(0, bind.binding, *input_buffers_.at(bind.buffer)); + } -DeviceAllocation *CompiledTaichiKernel::ctx_buffer_host() const { - return ctx_buffer_host_.get(); -} + cmdlist->bind_pipeline(vp); + cmdlist->bind_resources(binder); + cmdlist->dispatch(group_x); + cmdlist->memory_barrier(); + } -void CompiledTaichiKernel::command_list(CommandList *cmdlist) const { - const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; - - for (int i = 0; i < task_attribs.size(); ++i) { - const auto &attribs = task_attribs[i]; - auto vp = pipelines_[i].get(); - const int group_x = (attribs.advisory_total_num_threads + - attribs.advisory_num_threads_per_group - 1) / - attribs.advisory_num_threads_per_group; - ResourceBinder *binder = vp->resource_binder(); - for (auto &bind : attribs.buffer_binds) { - binder->rw_buffer(0, bind.binding, *input_buffers_.at(bind.buffer)); + const auto ctx_sz = ti_kernel_attribs_.ctx_attribs.total_bytes(); + if (!ti_kernel_attribs_.ctx_attribs.empty()) { + cmdlist->buffer_copy(ctx_buffer_host_->get_ptr(0), + ctx_buffer_->get_ptr(0), ctx_sz); + cmdlist->buffer_barrier(*ctx_buffer_host_); } + } + + private: + TaichiKernelAttributes ti_kernel_attribs_; + std::vector tasks_attribs_; + + Device *device_; + + InputBuffersMap input_buffers_; - cmdlist->bind_pipeline(vp); - cmdlist->bind_resources(binder); - cmdlist->dispatch(group_x); - cmdlist->memory_barrier(); + // Right now |ctx_buffer_| is allocated from a HOST_VISIBLE|COHERENT + // memory, because we do not do computation on this buffer anyway, and it may + // not worth the effort doing another hop via a staging buffer. + // TODO: Provide an option to use staging buffer. This could be useful if the + // kernel does lots of IO on the context buffer, e.g., copy a large np array. + std::unique_ptr ctx_buffer_{nullptr}; + std::unique_ptr ctx_buffer_host_{nullptr}; + std::vector> pipelines_; +}; + +} // namespace + +class VkRuntime ::Impl { + public: + explicit Impl(const Params ¶ms) + : host_result_buffer_(params.host_result_buffer) { + TI_ASSERT(host_result_buffer_ != nullptr); + EmbeddedVulkanDevice::Params evd_params; + evd_params.api_version = VulkanEnvSettings::kApiVersion(); + embedded_device_ = std::make_unique(evd_params); + device_ = embedded_device_->get_ti_device(); + + init_buffers(); } - const auto ctx_sz = ti_kernel_attribs_.ctx_attribs.total_bytes(); - if (!ti_kernel_attribs_.ctx_attribs.empty()) { - cmdlist->buffer_copy(ctx_buffer_host_->get_ptr(0), ctx_buffer_->get_ptr(0), - ctx_sz); - cmdlist->buffer_barrier(*ctx_buffer_host_); + ~Impl() { + { + decltype(ti_kernels_) tmp; + tmp.swap(ti_kernels_); + } + global_tmps_buffer_.reset(); } -} -VkRuntime::VkRuntime(const Params ¶ms) - : host_result_buffer_(params.host_result_buffer), device_(params.device) { - TI_ASSERT(host_result_buffer_ != nullptr); - init_buffers(); -} + void materialize_snode_tree(SNodeTree *tree) { + auto *const root = tree->root(); + CompiledSNodeStructs compiled_structs = + vulkan::compile_snode_structs(*root); + add_root_buffer(compiled_structs.root_size); + compiled_snode_structs_.push_back(compiled_structs); + } -VkRuntime::~VkRuntime() { - { - decltype(ti_kernels_) tmp; - tmp.swap(ti_kernels_); + void destroy_snode_tree(SNodeTree *snode_tree) { + int root_id = -1; + for (int i = 0; i < compiled_snode_structs_.size(); ++i) { + if (compiled_snode_structs_[i].root == snode_tree->root()) { + root_id = i; + } + } + if (root_id == -1) { + TI_ERROR("the tree to be destroyed cannot be found"); + } + root_buffers_[root_id].reset(); } - global_tmps_buffer_.reset(); -} -void VkRuntime::materialize_snode_tree(SNodeTree *tree) { - auto *const root = tree->root(); - CompiledSNodeStructs compiled_structs = vulkan::compile_snode_structs(*root); - add_root_buffer(compiled_structs.root_size); - compiled_snode_structs_.push_back(compiled_structs); -} + const std::vector &get_compiled_structs() const { + return compiled_snode_structs_; + } -void VkRuntime::destroy_snode_tree(SNodeTree *snode_tree) { - int root_id = -1; - for (int i = 0; i < compiled_snode_structs_.size(); ++i) { - if (compiled_snode_structs_[i].root == snode_tree->root()) { - root_id = i; + KernelHandle register_taichi_kernel(RegisterParams reg_params) { + CompiledTaichiKernel::Params params; + params.ti_kernel_attribs = &(reg_params.kernel_attribs); + params.compiled_structs = get_compiled_structs(); + params.device = embedded_device_->device(); + params.root_buffers = {}; + for (int root = 0; root < root_buffers_.size(); ++root) { + params.root_buffers.push_back(root_buffers_[root].get()); + } + params.global_tmps_buffer = global_tmps_buffer_.get(); + + for (int i = 0; i < reg_params.task_spirv_source_codes.size(); ++i) { + const auto &attribs = reg_params.kernel_attribs.tasks_attribs[i]; + const auto &spirv_src = reg_params.task_spirv_source_codes[i]; + const auto &task_name = attribs.name; + + // If we can reach here, we have succeeded. Otherwise + // std::optional::value() would have killed us. + params.spirv_bins.push_back(std::move(spirv_src)); + } + KernelHandle res; + res.id_ = ti_kernels_.size(); + ti_kernels_.push_back(std::make_unique(params)); + return res; + } + + void launch_kernel(KernelHandle handle, Context *host_ctx) { + auto *ti_kernel = ti_kernels_[handle.id_].get(); + auto ctx_blitter = HostDeviceContextBlitter::maybe_make( + &ti_kernel->ti_kernel_attribs().ctx_attribs, host_ctx, device_, + host_result_buffer_, ti_kernel->ctx_buffer(), + ti_kernel->ctx_buffer_host()); + if (ctx_blitter) { + TI_ASSERT(ti_kernel->ctx_buffer() != nullptr); + ctx_blitter->host_to_device(); + } + + if (!current_cmdlist_) { + current_cmdlist_ = device_->get_compute_stream()->new_command_list(); + } + + ti_kernel->command_list(current_cmdlist_.get()); + + if (ctx_blitter) { + device_->get_compute_stream()->submit(current_cmdlist_.get()); + ctx_blitter->device_to_host(); + + current_cmdlist_ = nullptr; } } - if (root_id == -1) { - TI_ERROR("the tree to be destroyed cannot be found"); + + void synchronize() { + device_->get_compute_stream()->command_sync(); } - root_buffers_[root_id].reset(); -} -const std::vector &VkRuntime::get_compiled_structs() - const { - return compiled_snode_structs_; -} + Device *get_ti_device() const { + return device_; + } -VkRuntime::KernelHandle VkRuntime::register_taichi_kernel( - VkRuntime::RegisterParams reg_params) { - CompiledTaichiKernel::Params params; - params.ti_kernel_attribs = &(reg_params.kernel_attribs); - params.compiled_structs = get_compiled_structs(); - params.device = device_; - params.root_buffers = {}; - for (int root = 0; root < root_buffers_.size(); ++root) { - params.root_buffers.push_back(root_buffers_[root].get()); - } - params.global_tmps_buffer = global_tmps_buffer_.get(); - - for (int i = 0; i < reg_params.task_spirv_source_codes.size(); ++i) { - const auto &attribs = reg_params.kernel_attribs.tasks_attribs[i]; - const auto &spirv_src = reg_params.task_spirv_source_codes[i]; - const auto &task_name = attribs.name; - - // If we can reach here, we have succeeded. Otherwise - // std::optional::value() would have killed us. - params.spirv_bins.push_back(std::move(spirv_src)); - } - KernelHandle res; - res.id_ = ti_kernels_.size(); - ti_kernels_.push_back(std::make_unique(params)); - return res; -} + private: + void init_buffers() { + size_t gtmp_buffer_size = 1024 * 1024; -void VkRuntime::launch_kernel(KernelHandle handle, Context *host_ctx) { - auto *ti_kernel = ti_kernels_[handle.id_].get(); - auto ctx_blitter = HostDeviceContextBlitter::maybe_make( - &ti_kernel->ti_kernel_attribs().ctx_attribs, host_ctx, device_, - host_result_buffer_, ti_kernel->ctx_buffer(), - ti_kernel->ctx_buffer_host()); - if (ctx_blitter) { - TI_ASSERT(ti_kernel->ctx_buffer() != nullptr); - ctx_blitter->host_to_device(); + global_tmps_buffer_ = device_->allocate_memory_unique( + {gtmp_buffer_size, + /*host_write=*/false, /*host_read=*/false, + /*export_sharing=*/false, AllocUsage::Storage}); + + // Need to zero fill the buffers, otherwise there could be NaN. + Stream *stream = device_->get_compute_stream(); + auto cmdlist = stream->new_command_list(); + + cmdlist->buffer_fill(global_tmps_buffer_->get_ptr(0), gtmp_buffer_size, + /*data=*/0); + stream->submit_synced(cmdlist.get()); } - if (!current_cmdlist_) { - current_cmdlist_ = device_->get_compute_stream()->new_command_list(); + void add_root_buffer(size_t root_buffer_size) { + if (root_buffer_size == 0) { + root_buffer_size = 4; // there might be empty roots + } + std::unique_ptr new_buffer = + device_->allocate_memory_unique( + {root_buffer_size, + /*host_write=*/false, /*host_read=*/false, + /*export_sharing=*/false, AllocUsage::Storage}); + Stream *stream = device_->get_compute_stream(); + auto cmdlist = stream->new_command_list(); + cmdlist->buffer_fill(new_buffer->get_ptr(0), root_buffer_size, /*data=*/0); + stream->submit_synced(cmdlist.get()); + root_buffers_.push_back(std::move(new_buffer)); } - ti_kernel->command_list(current_cmdlist_.get()); + uint64_t *const host_result_buffer_; - if (ctx_blitter) { - device_->get_compute_stream()->submit(current_cmdlist_.get()); - ctx_blitter->device_to_host(); + std::unique_ptr embedded_device_{nullptr}; - current_cmdlist_ = nullptr; + std::vector> root_buffers_; + std::unique_ptr global_tmps_buffer_; + + Device *device_; + + std::unique_ptr current_cmdlist_{nullptr}; + + std::vector> ti_kernels_; + + std::vector compiled_snode_structs_; +}; + +#else + +class VkRuntime::Impl { + public: + Impl(const Params &) { + TI_ERROR("Vulkan disabled"); } -} -void VkRuntime::synchronize() { - if (current_cmdlist_) { - device_->get_compute_stream()->submit(current_cmdlist_.get()); - current_cmdlist_ = nullptr; + KernelHandle register_taichi_kernel(RegisterParams) { + TI_ERROR("Vulkan disabled"); + return KernelHandle(); + } + + void launch_kernel(KernelHandle, Context *) { + TI_ERROR("Vulkan disabled"); } - device_->get_compute_stream()->command_sync(); + + void synchronize() { + TI_ERROR("Vulkan disabled"); + } + + void materialize_snode_tree(SNodeTree *tree) { + TI_ERROR("Vulkan disabled"); + } + + const std::vector &get_compiled_structs() const { + TI_ERROR("Vulkan disabled"); + } + + void destroy_snode_tree(SNodeTree *snode_tree) { + TI_ERROR("Vulkan disabled"); + } +}; + +#endif // TI_WITH_VULKAN + +VkRuntime::VkRuntime(const Params ¶ms) + : impl_(std::make_unique(params)) { } -Device *VkRuntime::get_ti_device() const { - return device_; +VkRuntime::~VkRuntime() { +} + +VkRuntime::KernelHandle VkRuntime::register_taichi_kernel( + RegisterParams params) { + return impl_->register_taichi_kernel(std::move(params)); +} + +void VkRuntime::launch_kernel(KernelHandle handle, Context *host_ctx) { + impl_->launch_kernel(handle, host_ctx); } -void VkRuntime::init_buffers() { - size_t gtmp_buffer_size = 1024 * 1024; +void VkRuntime::synchronize() { + impl_->synchronize(); +} - global_tmps_buffer_ = device_->allocate_memory_unique( - {gtmp_buffer_size, - /*host_write=*/false, /*host_read=*/false, - /*export_sharing=*/false, AllocUsage::Storage}); +void VkRuntime::materialize_snode_tree(SNodeTree *tree) { + impl_->materialize_snode_tree(tree); +} - // Need to zero fill the buffers, otherwise there could be NaN. - Stream *stream = device_->get_compute_stream(); - auto cmdlist = stream->new_command_list(); +const std::vector &VkRuntime::get_compiled_structs() + const { + return impl_->get_compiled_structs(); +} - cmdlist->buffer_fill(global_tmps_buffer_->get_ptr(0), gtmp_buffer_size, - /*data=*/0); - stream->submit_synced(cmdlist.get()); +void VkRuntime::destroy_snode_tree(SNodeTree *snode_tree) { + return impl_->destroy_snode_tree(snode_tree); } -void VkRuntime::add_root_buffer(size_t root_buffer_size) { - if (root_buffer_size == 0) { - root_buffer_size = 4; // there might be empty roots - } - std::unique_ptr new_buffer = - device_->allocate_memory_unique( - {root_buffer_size, - /*host_write=*/false, /*host_read=*/false, - /*export_sharing=*/false, AllocUsage::Storage}); - Stream *stream = device_->get_compute_stream(); - auto cmdlist = stream->new_command_list(); - cmdlist->buffer_fill(new_buffer->get_ptr(0), root_buffer_size, /*data=*/0); - stream->submit_synced(cmdlist.get()); - root_buffers_.push_back(std::move(new_buffer)); +Device *VkRuntime::get_ti_device() const { +#ifdef TI_WITH_VULKAN + return impl_->get_ti_device(); +#else + return nullptr; +#endif +} + +bool is_vulkan_api_available() { +#ifdef TI_WITH_VULKAN + return VulkanLoader::instance().init(); +#else + return false; +#endif } } // namespace vulkan diff --git a/taichi/backends/vulkan/runtime.h b/taichi/backends/vulkan/runtime.h index d1389277a..76ce8144a 100644 --- a/taichi/backends/vulkan/runtime.h +++ b/taichi/backends/vulkan/runtime.h @@ -14,58 +14,13 @@ namespace taichi { namespace lang { namespace vulkan { -using BufferType = TaskAttributes::BufferType; -using BufferInfo = TaskAttributes::BufferInfo; -using BufferBind = TaskAttributes::BufferBind; -using BufferInfoHasher = TaskAttributes::BufferInfoHasher; - -// TODO: In the future this isn't necessarily a pointer, since DeviceAllocation -// is already a pretty cheap handle> -using InputBuffersMap = - std::unordered_map; - -class CompiledTaichiKernel { - public: - struct Params { - const TaichiKernelAttributes *ti_kernel_attribs{nullptr}; - std::vector> spirv_bins; - std::vector compiled_structs; - - Device *device{nullptr}; - std::vector root_buffers; - DeviceAllocation *global_tmps_buffer{nullptr}; - }; - - CompiledTaichiKernel(const Params &ti_params); - - const TaichiKernelAttributes &ti_kernel_attribs() const; - - size_t num_pipelines() const; - - DeviceAllocation *ctx_buffer() const; - - DeviceAllocation *ctx_buffer_host() const; - - void command_list(CommandList *cmdlist) const; - +class VkRuntime { private: - TaichiKernelAttributes ti_kernel_attribs_; - std::vector tasks_attribs_; - - Device *device_; + class Impl; - InputBuffersMap input_buffers_; - - std::unique_ptr ctx_buffer_{nullptr}; - std::unique_ptr ctx_buffer_host_{nullptr}; - std::vector> pipelines_; -}; - -class VkRuntime { public: struct Params { - uint64_t *host_result_buffer{nullptr}; - Device *device{nullptr}; + uint64_t *host_result_buffer = nullptr; }; explicit VkRuntime(const Params ¶ms); @@ -74,7 +29,7 @@ class VkRuntime { class KernelHandle { private: - friend class VkRuntime; + friend class Impl; int id_ = -1; }; @@ -98,23 +53,11 @@ class VkRuntime { const std::vector &get_compiled_structs() const; private: - void init_buffers(); - void add_root_buffer(size_t root_buffer_size); - - Device *device_; - - uint64_t *const host_result_buffer_; - - std::vector> root_buffers_; - std::unique_ptr global_tmps_buffer_; - - std::unique_ptr current_cmdlist_{nullptr}; - - std::vector> ti_kernels_; - - std::vector compiled_snode_structs_; + std::unique_ptr impl_; }; +bool is_vulkan_api_available(); + } // namespace vulkan } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/spirv_ir_builder.cpp b/taichi/backends/vulkan/spirv_ir_builder.cpp index 4be026a40..37ac969d8 100644 --- a/taichi/backends/vulkan/spirv_ir_builder.cpp +++ b/taichi/backends/vulkan/spirv_ir_builder.cpp @@ -12,9 +12,9 @@ void IRBuilder::init_header() { TI_ASSERT(header_.size() == 0U); header_.push_back(spv::MagicNumber); - header_.push_back(device_->get_cap(cap::spirv_version)); + header_.push_back(device_->get_cap(cap::vk_spirv_version)); - TI_TRACE("SPIR-V Version {}", device_->get_cap(cap::spirv_version)); + TI_TRACE("SPIR-V Version {}", device_->get_cap(cap::vk_spirv_version)); // generator: set to 0, unknown header_.push_back(0U); @@ -26,48 +26,46 @@ void IRBuilder::init_header() { // capability ib_.begin(spv::OpCapability).add(spv::CapabilityShader).commit(&header_); - if (device_->get_cap(cap::spirv_has_atomic_float64_add)) { + if (device_->get_cap(cap::vk_has_atomic_float64_add)) { ib_.begin(spv::OpCapability) .add(spv::CapabilityAtomicFloat64AddEXT) .commit(&header_); } - if (device_->get_cap(cap::spirv_has_atomic_float_add)) { + if (device_->get_cap(cap::vk_has_atomic_float_add)) { ib_.begin(spv::OpCapability) .add(spv::CapabilityAtomicFloat32AddEXT) .commit(&header_); } - if (device_->get_cap(cap::spirv_has_atomic_float_minmax)) { + if (device_->get_cap(cap::vk_has_atomic_float_minmax)) { ib_.begin(spv::OpCapability) .add(spv::CapabilityAtomicFloat32MinMaxEXT) .commit(&header_); } - if (device_->get_cap(cap::spirv_has_variable_ptr)) { - /* + if (device_->get_cap(cap::vk_has_spv_variable_ptr)) { ib_.begin(spv::OpCapability) .add(spv::CapabilityVariablePointers) .commit(&header_); ib_.begin(spv::OpCapability) .add(spv::CapabilityVariablePointersStorageBuffer) .commit(&header_); - */ } - if (device_->get_cap(cap::spirv_has_int8)) { + if (device_->get_cap(cap::vk_has_int8)) { ib_.begin(spv::OpCapability).add(spv::CapabilityInt8).commit(&header_); } - if (device_->get_cap(cap::spirv_has_int16)) { + if (device_->get_cap(cap::vk_has_int16)) { ib_.begin(spv::OpCapability).add(spv::CapabilityInt16).commit(&header_); } - if (device_->get_cap(cap::spirv_has_int64)) { + if (device_->get_cap(cap::vk_has_int64)) { ib_.begin(spv::OpCapability).add(spv::CapabilityInt64).commit(&header_); } - if (device_->get_cap(cap::spirv_has_float16)) { + if (device_->get_cap(cap::vk_has_float16)) { ib_.begin(spv::OpCapability).add(spv::CapabilityFloat16).commit(&header_); } - if (device_->get_cap(cap::spirv_has_float64)) { + if (device_->get_cap(cap::vk_has_float64)) { ib_.begin(spv::OpCapability).add(spv::CapabilityFloat64).commit(&header_); } @@ -75,19 +73,19 @@ void IRBuilder::init_header() { .add("SPV_KHR_storage_buffer_storage_class") .commit(&header_); - if (device_->get_cap(cap::spirv_has_variable_ptr)) { + if (device_->get_cap(cap::vk_has_spv_variable_ptr)) { ib_.begin(spv::OpExtension) .add("SPV_KHR_variable_pointers") .commit(&header_); } - if (device_->get_cap(cap::spirv_has_atomic_float_add)) { + if (device_->get_cap(cap::vk_has_atomic_float_add)) { ib_.begin(spv::OpExtension) .add("SPV_EXT_shader_atomic_float_add") .commit(&header_); } - if (device_->get_cap(cap::spirv_has_atomic_float_minmax)) { + if (device_->get_cap(cap::vk_has_atomic_float_minmax)) { ib_.begin(spv::OpExtension) .add("SPV_EXT_shader_atomic_float_min_max") .commit(&header_); @@ -120,22 +118,22 @@ std::vector IRBuilder::finalize() { void IRBuilder::init_pre_defs() { ext_glsl450_ = ext_inst_import("GLSL.std.450"); t_bool_ = declare_primitive_type(get_data_type()); - if (device_->get_cap(cap::spirv_has_int8)) { + if (device_->get_cap(cap::vk_has_int8)) { t_int8_ = declare_primitive_type(get_data_type()); t_uint8_ = declare_primitive_type(get_data_type()); } - if (device_->get_cap(cap::spirv_has_int16)) { + if (device_->get_cap(cap::vk_has_int16)) { t_int16_ = declare_primitive_type(get_data_type()); t_uint16_ = declare_primitive_type(get_data_type()); } t_int32_ = declare_primitive_type(get_data_type()); t_uint32_ = declare_primitive_type(get_data_type()); - if (device_->get_cap(cap::spirv_has_int64)) { + if (device_->get_cap(cap::vk_has_int64)) { t_int64_ = declare_primitive_type(get_data_type()); t_uint64_ = declare_primitive_type(get_data_type()); } t_fp32_ = declare_primitive_type(get_data_type()); - if (device_->get_cap(cap::spirv_has_float64)) { + if (device_->get_cap(cap::vk_has_float64)) { t_fp64_ = declare_primitive_type(get_data_type()); } // declare void, and void functions @@ -212,35 +210,35 @@ SType IRBuilder::get_primitive_type(const DataType &dt) const { } else if (dt->is_primitive(PrimitiveTypeID::f32)) { return t_fp32_; } else if (dt->is_primitive(PrimitiveTypeID::f64)) { - if (!device_->get_cap(cap::spirv_has_float64)) + if (!device_->get_cap(cap::vk_has_float64)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_fp64_; } else if (dt->is_primitive(PrimitiveTypeID::i8)) { - if (!device_->get_cap(cap::spirv_has_int8)) + if (!device_->get_cap(cap::vk_has_int8)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_int8_; } else if (dt->is_primitive(PrimitiveTypeID::i16)) { - if (!device_->get_cap(cap::spirv_has_int16)) + if (!device_->get_cap(cap::vk_has_int16)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_int16_; } else if (dt->is_primitive(PrimitiveTypeID::i32)) { return t_int32_; } else if (dt->is_primitive(PrimitiveTypeID::i64)) { - if (!device_->get_cap(cap::spirv_has_int64)) + if (!device_->get_cap(cap::vk_has_int64)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_int64_; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { - if (!device_->get_cap(cap::spirv_has_int8)) + if (!device_->get_cap(cap::vk_has_int8)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint8_; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { - if (!device_->get_cap(cap::spirv_has_int16)) + if (!device_->get_cap(cap::vk_has_int16)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint16_; } else if (dt->is_primitive(PrimitiveTypeID::u32)) { return t_uint32_; } else if (dt->is_primitive(PrimitiveTypeID::u64)) { - if (!device_->get_cap(cap::spirv_has_int64)) + if (!device_->get_cap(cap::vk_has_int64)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint64_; } else { @@ -252,13 +250,13 @@ SType IRBuilder::get_primitive_buffer_type(const bool struct_compiled, const DataType &dt) const { if (struct_compiled) { if (dt->is_primitive(PrimitiveTypeID::f32) && - device_->get_cap(cap::spirv_has_atomic_float_add)) { + device_->get_cap(cap::vk_has_atomic_float_add)) { return t_fp32_; } else if (dt->is_primitive(PrimitiveTypeID::f64) && - device_->get_cap(cap::spirv_has_atomic_float64_add)) { + device_->get_cap(cap::vk_has_atomic_float64_add)) { return t_fp64_; } else if (dt->is_primitive(PrimitiveTypeID::i64) && - device_->get_cap(cap::spirv_has_atomic_i64)) { + device_->get_cap(cap::vk_has_atomic_i64)) { return t_int64_; } } @@ -334,7 +332,7 @@ SType IRBuilder::get_struct_array_type(const SType &value_type, .add_seq(struct_type, 0, spv::DecorationOffset, 0) .commit(&decorate_); - if (device_->get_cap(cap::spirv_version) < 0x10300) { + if (device_->get_cap(cap::vk_spirv_version) < 0x10300) { // NOTE: BufferBlock was deprecated in SPIRV 1.3 // use StorageClassStorageBuffer instead. // runtime array are always decorated as BufferBlock(shader storage buffer) @@ -354,7 +352,7 @@ Value IRBuilder::buffer_argument(const SType &value_type, // NOTE: BufferBlock was deprecated in SPIRV 1.3 // use StorageClassStorageBuffer instead. spv::StorageClass storage_class; - if (device_->get_cap(cap::spirv_version) < 0x10300) { + if (device_->get_cap(cap::vk_spirv_version) < 0x10300) { storage_class = spv::StorageClassUniform; } else { storage_class = spv::StorageClassStorageBuffer; @@ -380,7 +378,7 @@ Value IRBuilder::struct_array_access(const SType &res_type, TI_ASSERT(res_type.flag == TypeKind::kPrimitive); spv::StorageClass storage_class; - if (device_->get_cap(cap::spirv_version) < 0x10300) { + if (device_->get_cap(cap::vk_spirv_version) < 0x10300) { storage_class = spv::StorageClassUniform; } else { storage_class = spv::StorageClassStorageBuffer; @@ -910,10 +908,6 @@ void IRBuilder::init_random_function(Value global_tmp_) { _rand_y_ = new_value(local_type, ValueKind::kVariablePtr); _rand_z_ = new_value(local_type, ValueKind::kVariablePtr); _rand_w_ = new_value(local_type, ValueKind::kVariablePtr); - global_values.push_back(_rand_x_); - global_values.push_back(_rand_y_); - global_values.push_back(_rand_z_); - global_values.push_back(_rand_w_); ib_.begin(spv::OpVariable) .add_seq(local_type, _rand_x_, spv::StorageClassPrivate) .commit(&global_); diff --git a/taichi/backends/vulkan/spirv_ir_builder.h b/taichi/backends/vulkan/spirv_ir_builder.h index dcc116bff..a47a15d0b 100644 --- a/taichi/backends/vulkan/spirv_ir_builder.h +++ b/taichi/backends/vulkan/spirv_ir_builder.h @@ -332,8 +332,6 @@ class IRBuilder { return new_value(t_void_func_, ValueKind::kFunction); } - std::vector global_values; - // Declare the entry point for a kernel function void commit_kernel_function(const Value &func, const std::string &name, @@ -344,11 +342,6 @@ class IRBuilder { for (const auto &arg : args) { ib_.add(arg); } - if (device_->get_cap(DeviceCapability::spirv_version) >= 0x10400) { - for (const auto &v : global_values) { - ib_.add(v); - } - } if (gl_global_invocation_id.id != 0) { ib_.add(gl_global_invocation_id); } diff --git a/taichi/backends/vulkan/vulkan_program.cpp b/taichi/backends/vulkan/vulkan_program.cpp index 80f749456..7c03bd371 100644 --- a/taichi/backends/vulkan/vulkan_program.cpp +++ b/taichi/backends/vulkan/vulkan_program.cpp @@ -16,13 +16,8 @@ void VulkanProgramImpl::materialize_runtime(MemoryPool *memory_pool, *result_buffer_ptr = (uint64 *)memory_pool->allocate( sizeof(uint64) * taichi_result_buffer_entries, 8); - EmbeddedVulkanDevice::Params evd_params; - evd_params.api_version = VulkanEnvSettings::kApiVersion(); - embedded_device_ = std::make_unique(evd_params); - vulkan::VkRuntime::Params params; params.host_result_buffer = *result_buffer_ptr; - params.device = embedded_device_->device(); vulkan_runtime_ = std::make_unique(std::move(params)); } @@ -31,13 +26,9 @@ void VulkanProgramImpl::materialize_snode_tree( std::vector> &, std::unordered_map &, uint64 *result_buffer) { + // TODO: support materializing multiple snode trees vulkan_runtime_->materialize_snode_tree(tree); } -VulkanProgramImpl::~VulkanProgramImpl() { - vulkan_runtime_.reset(); - embedded_device_.reset(); -} - } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/vulkan_program.h b/taichi/backends/vulkan/vulkan_program.h index dd2fcf55e..ce72b87df 100644 --- a/taichi/backends/vulkan/vulkan_program.h +++ b/taichi/backends/vulkan/vulkan_program.h @@ -8,22 +8,10 @@ #include "taichi/program/snode_expr_utils.h" #include "taichi/program/program_impl.h" -#include "taichi/backends/vulkan/embedded_device.h" -#include "taichi/backends/vulkan/vulkan_utils.h" -#include "taichi/backends/vulkan/loader.h" - -#include "vk_mem_alloc.h" -#include "taichi/backends/vulkan/vulkan_device.h" - #include namespace taichi { namespace lang { - -namespace vulkan { -class EmbeddedVulkanDevice; -} - class VulkanProgramImpl : public ProgramImpl { public: VulkanProgramImpl(CompileConfig &config) : ProgramImpl(config) { @@ -58,10 +46,10 @@ class VulkanProgramImpl : public ProgramImpl { vulkan_runtime_->destroy_snode_tree(snode_tree); } - ~VulkanProgramImpl() override; + ~VulkanProgramImpl() { + } private: - std::unique_ptr embedded_device_{nullptr}; std::unique_ptr vulkan_runtime_; }; } // namespace lang diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index a2352e210..cd67d310c 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -28,7 +28,7 @@ #endif #ifdef TI_WITH_VULKAN #include "taichi/backends/vulkan/vulkan_program.h" -#include "taichi/backends/vulkan/loader.h" +#include "taichi/backends/vulkan/runtime.h" #endif #if defined(TI_ARCH_x64) diff --git a/taichi/python/export_misc.cpp b/taichi/python/export_misc.cpp index b73efbba6..bcda0684e 100644 --- a/taichi/python/export_misc.cpp +++ b/taichi/python/export_misc.cpp @@ -23,10 +23,6 @@ #include "taichi/backends/cuda/cuda_driver.h" #endif -#ifdef TI_WITH_VULKAN -#include "taichi/backends/vulkan/loader.h" -#endif - #ifdef TI_WITH_CC namespace taichi::lang::cccp { extern bool is_c_backend_available(); @@ -170,11 +166,7 @@ void export_misc(py::module &m) { m.def("with_cuda", is_cuda_api_available); m.def("with_metal", taichi::lang::metal::is_metal_api_available); m.def("with_opengl", taichi::lang::opengl::is_opengl_api_available); -#ifdef TI_WITH_VULKAN m.def("with_vulkan", taichi::lang::vulkan::is_vulkan_api_available); -#else - m.def("with_vulkan", []() { return false; }); -#endif #ifdef TI_WITH_CC m.def("with_cc", taichi::lang::cccp::is_c_backend_available);