diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index ccd420f17..ce564e2b0 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -3,12 +3,12 @@ Related issue = #
+
[](https://ci.appveyor.com/project/yuanming-hu/taichi/branch/master)
@@ -13,9 +13,9 @@
**Taichi** (太极) is a programming language designed for *high-performance computer graphics*. It is deeply embedded in **Python**, and its **just-in-time compiler** offloads compute-intensive tasks to multi-core CPUs and massively parallel GPUs.
-
+
-Advanced features of Taichi include [spatially sparse computing](https://taichi.readthedocs.io/en/latest/sparse.html) and [differentiable programming](https://taichi.readthedocs.io/en/latest/differentiable_programming.html) [[examples]](https://github.com/yuanming-hu/difftaichi).
+Advanced features of Taichi include [spatially sparse computing](https://docs.taichi.graphics/docs/lang/articles/advanced/sparse) and [differentiable programming](https://docs.taichi.graphics/docs/lang/articles/advanced/differentiable_programming) [[examples]](https://github.com/yuanming-hu/difftaichi).
**Please check out our SIGGRAPH 2020 course on Taichi basics:** [YouTube](https://youtu.be/Y0-76n3aZFA), [Bilibili](https://www.bilibili.com/video/BV1kA411n7jk/), [slides (pdf)](https://yuanming.taichi.graphics/publication/2020-taichi-tutorial/taichi-tutorial.pdf).
@@ -23,10 +23,10 @@ Advanced features of Taichi include [spatially sparse computing](https://taichi.
## Examples ([More...](misc/examples.md))
-
-
-
-
+
+
+
+
## Installation [](https://pepy.tech/project/taichi)
@@ -67,3 +67,7 @@ If you use Taichi in your research, please cite our papers:
- [Taichi Conference](https://github.com/taichi-dev/taichicon): Taichi developer conferences.
- [GAMES 201 Lectures](https://github.com/taichi-dev/games201): (Chinese) A hands-on course on building advanced physics engines, based on Taichi.
- [More...](misc/links.md)
+
+## Security
+
+Please disclose security issues responsibly by contacting contact@taichi.graphics.
diff --git a/cmake/PythonNumpyPybind11.cmake b/cmake/PythonNumpyPybind11.cmake
index bd351900a..5957afc79 100644
--- a/cmake/PythonNumpyPybind11.cmake
+++ b/cmake/PythonNumpyPybind11.cmake
@@ -14,7 +14,13 @@ endif ()
if (WIN32)
execute_process(COMMAND where ${PYTHON_EXECUTABLE}
- OUTPUT_VARIABLE PYTHON_EXECUTABLE_PATH)
+ OUTPUT_VARIABLE PYTHON_EXECUTABLE_PATHS)
+ if (${PYTHON_EXECUTABLE_PATHS})
+ string(FIND ${PYTHON_EXECUTABLE_PATHS} "\n" _LINE_BREAK_LOC)
+ string(SUBSTRING ${PYTHON_EXECUTABLE_PATHS} 0 ${_LINE_BREAK_LOC} PYTHON_EXECUTABLE_PATH)
+ else ()
+ set(PYTHON_EXECUTABLE_PATH ${PYTHON_EXECUTABLE})
+ endif ()
else ()
execute_process(COMMAND which ${PYTHON_EXECUTABLE}
OUTPUT_VARIABLE PYTHON_EXECUTABLE_PATH)
diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake
index e6f629163..030d58c6a 100644
--- a/cmake/TaichiCXXFlags.cmake
+++ b/cmake/TaichiCXXFlags.cmake
@@ -44,11 +44,14 @@ if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" OR "${CMAKE_SYSTEM_PROCESSOR}"
message("Setting -march=nehalem for x86_64 processors")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem -DTI_ARCH_x64")
endif()
+ set(ARCH "x64")
elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_ARCH_ARM")
+ set(ARCH "arm64")
else()
message(FATAL_ERROR "Unknown processor type ${CMAKE_SYSTEM_PROCESSOR}")
endif()
+set(HOST_ARCH ${ARCH} CACHE INTERNAL "Host arch")
if (USE_STDCPP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
diff --git a/cmake/TaichiTests.cmake b/cmake/TaichiTests.cmake
index 4404a1cda..d8f594474 100644
--- a/cmake/TaichiTests.cmake
+++ b/cmake/TaichiTests.cmake
@@ -23,6 +23,15 @@ include_directories(
)
add_executable(${TESTS_NAME} ${TAICHI_TESTS_SOURCE})
+if (WIN32)
+ # Output the executable to bin/ instead of build/Debug/...
+ set(TESTS_OUTPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/bin")
+ set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${TESTS_OUTPUT_DIR})
+ set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${TESTS_OUTPUT_DIR})
+ set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${TESTS_OUTPUT_DIR})
+ set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL ${TESTS_OUTPUT_DIR})
+ set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${TESTS_OUTPUT_DIR})
+endif()
target_link_libraries(${TESTS_NAME} taichi_isolated_core)
target_link_libraries(${TESTS_NAME} gtest_main)
diff --git a/docs/lang/api/reference/_category_.json b/docs/lang/api/reference/_category_.json
new file mode 100644
index 000000000..ec4123c51
--- /dev/null
+++ b/docs/lang/api/reference/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "API Reference",
+ "position": 99
+}
diff --git a/docs/lang/articles/advanced/_category_.json b/docs/lang/articles/advanced/_category_.json
new file mode 100644
index 000000000..84fc16f93
--- /dev/null
+++ b/docs/lang/articles/advanced/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Advanced Programming",
+ "position": 3
+}
diff --git a/docs/lang/articles/basic/_category_.json b/docs/lang/articles/basic/_category_.json
new file mode 100644
index 000000000..60b7ef558
--- /dev/null
+++ b/docs/lang/articles/basic/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Taichi Language Basic Concepts",
+ "position": 2
+}
diff --git a/docs/lang/articles/contribution/_category_.json b/docs/lang/articles/contribution/_category_.json
new file mode 100644
index 000000000..74136a441
--- /dev/null
+++ b/docs/lang/articles/contribution/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Contribution Guide",
+ "position": 5
+}
diff --git a/docs/lang/articles/contribution/contributor_guide.md b/docs/lang/articles/contribution/contributor_guide.md
index 7c1adb64c..b1d8d62ff 100644
--- a/docs/lang/articles/contribution/contributor_guide.md
+++ b/docs/lang/articles/contribution/contributor_guide.md
@@ -166,7 +166,7 @@ This design is terrible.
`yapf v0.29.0` locally before you use `ti format`.
- If you don't have these formatting tools locally, feel free to
- leverage GitHub actions: simply comment `\format` in a PR
+ leverage GitHub actions: simply comment `/format` in a PR
(e.g., [#2481](https://github.com/taichi-dev/taichi/pull/2481#issuecomment-872226701))
and then [Taichi Gardener](https://github.com/taichi-gardener)
will automatically format the code for you.
diff --git a/docs/lang/articles/contribution/life_of_kernel_lowres.jpg b/docs/lang/articles/contribution/life_of_kernel_lowres.jpg
new file mode 100644
index 000000000..0f7dbc332
Binary files /dev/null and b/docs/lang/articles/contribution/life_of_kernel_lowres.jpg differ
diff --git a/docs/lang/articles/contribution/writing_cpp_tests.md b/docs/lang/articles/contribution/writing_cpp_tests.md
index d41a42037..14792216d 100644
--- a/docs/lang/articles/contribution/writing_cpp_tests.md
+++ b/docs/lang/articles/contribution/writing_cpp_tests.md
@@ -21,6 +21,11 @@ make
./taichi_cpp_tests
```
+:::note
+On Windows, `taichi_cpp_tests.exe` will be placed inside the `%TAICHI_REPO_DIR%\bin` directory,
+so you can directly run it after adding the directory to `PATH` in [Setting up Taichi for development](dev_install.md#setting-up-taichi-for-development-3).
+:::
+
:::note
Consider polishing the CPP test infrastructure:
diff --git a/docs/lang/articles/misc/_category_.json b/docs/lang/articles/misc/_category_.json
new file mode 100644
index 000000000..da3cf0599
--- /dev/null
+++ b/docs/lang/articles/misc/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Miscellaneous Topics",
+ "position": 4
+}
diff --git a/examples/rendering/cornell_box.py b/examples/rendering/cornell_box.py
index 94e916733..9560b09ee 100644
--- a/examples/rendering/cornell_box.py
+++ b/examples/rendering/cornell_box.py
@@ -1,6 +1,7 @@
import time
import numpy as np
+from numpy.lib.function_base import average
import taichi as ti
@@ -8,6 +9,7 @@
res = (800, 800)
color_buffer = ti.Vector.field(3, dtype=ti.f32, shape=res)
count_var = ti.field(ti.i32, shape=(1, ))
+tonemapped_buffer = ti.Vector.field(3, dtype=ti.f32, shape=res)
max_ray_depth = 10
eps = 1e-4
@@ -482,18 +484,34 @@ def render():
count_var[0] = (count_var[0] + 1) % (stratify_res * stratify_res)
-gui = ti.GUI('Cornell Box', res)
+@ti.kernel
+def tonemap(accumulated: ti.f32) -> ti.f32:
+ sum = 0.0
+ sum_sq = 0.0
+ for i, j in color_buffer:
+ luma = color_buffer[i, j][0] * 0.2126 + color_buffer[
+ i, j][1] * 0.7152 + color_buffer[i, j][2] * 0.0722
+ sum += luma
+ sum_sq += ti.pow(luma / accumulated, 2.0)
+ mean = sum / (res[0] * res[1])
+ var = sum_sq / (res[0] * res[1]) - ti.pow(mean / accumulated, 2.0)
+ for i, j in tonemapped_buffer:
+ tonemapped_buffer[i, j] = ti.sqrt(color_buffer[i, j] / mean * 0.6)
+ return var
+
+
+gui = ti.GUI('Cornell Box', res, fast_gui=True)
+gui.fps_limit = 300
last_t = time.time()
i = 0
while gui.running:
render()
interval = 10
- if i % interval == 0 and i > 0:
- img = color_buffer.to_numpy() * (1 / (i + 1))
- img = np.sqrt(img / img.mean() * 0.24)
+ if i % interval == 0:
+ var = tonemap(i)
print("{:.2f} samples/s ({} iters, var={})".format(
- interval / (time.time() - last_t), i, np.var(img)))
+ interval / (time.time() - last_t), i, var))
last_t = time.time()
- gui.set_image(img)
+ gui.set_image(tonemapped_buffer)
gui.show()
i += 1
diff --git a/examples/simulation/mpm128.py b/examples/simulation/mpm128.py
index a32bf420e..8dffdd7ff 100644
--- a/examples/simulation/mpm128.py
+++ b/examples/simulation/mpm128.py
@@ -148,7 +148,9 @@ def reset():
attractor_strength[None] = -1
for s in range(int(2e-3 // dt)):
substep()
- colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32)
- gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()])
+ gui.circles(x.to_numpy(),
+ radius=1.5,
+ palette=[0x068587, 0xED553B, 0xEEEEF0],
+ palette_indices=material)
gui.show(
) # Change to gui.show(f'{frame:06d}.png') to write images to disk
diff --git a/examples/simulation/mpm99.py b/examples/simulation/mpm99.py
index 9a9d8fe59..cf71c3877 100644
--- a/examples/simulation/mpm99.py
+++ b/examples/simulation/mpm99.py
@@ -122,7 +122,9 @@ def initialize():
while not gui.get_event(ti.GUI.ESCAPE, ti.GUI.EXIT):
for s in range(int(2e-3 // dt)):
substep()
- colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32)
- gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()])
+ gui.circles(x.to_numpy(),
+ radius=1.5,
+ palette=[0x068587, 0xED553B, 0xEEEEF0],
+ palette_indices=material)
gui.show(
) # Change to gui.show(f'{frame:06d}.png') to write images to disk
diff --git a/misc/ci_setup.py b/misc/ci_setup.py
index 24868c76e..1e27fa94e 100644
--- a/misc/ci_setup.py
+++ b/misc/ci_setup.py
@@ -257,12 +257,11 @@ def run(self):
execute_command('echo $PYTHONPATH')
elif get_os_name() != 'win':
# compile ..
- os.makedirs('build', exist_ok=True)
arg = environ.get('CI_SETUP_CMAKE_ARGS', '')
+ os.makedirs('build', exist_ok=True)
execute_command(
- f'cd build && cmake .. -DPYTHON_EXECUTABLE={sys.executable} {arg}'
+ f'TAICHI_CMAKE_ARGS="{arg}" {sys.executable} setup.py install --user'
)
- execute_command('cd build && make -j 10')
return
if test_installation():
print(' Successfully Installed Taichi at {}.'.format(
diff --git a/python/.gitignore b/python/.gitignore
index 7d9f12eb9..12389e8d4 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -1,5 +1,7 @@
lib
taichi.egg-info
taichi/include
-setup.py
+taichi/examples
+taichi/assets
+taichi/tests
release
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
deleted file mode 100644
index c8c81c5e8..000000000
--- a/python/MANIFEST.in
+++ /dev/null
@@ -1,12 +0,0 @@
-include MANIFEST.in
-include *.txt
-include *.py
-include *.cfg
-include taichi/*.md
-include taichi/assets/*
-include taichi/lib/*.so
-include taichi/lib/*.pyd
-include taichi/lib/*.bc
-
-global-exclude *.pyc *.pyo
-global-exclude ffmpeg
diff --git a/python/build.py b/python/build.py
index ffcd10a40..34a33a58c 100644
--- a/python/build.py
+++ b/python/build.py
@@ -5,8 +5,6 @@
import shutil
import sys
-import taichi as ti
-
def get_os_name():
name = platform.platform()
@@ -26,95 +24,33 @@ def get_python_executable():
def build(project_name):
- """Build and package the wheel file in `python/dist`"""
+ """Build and package the wheel file in root `dist` dir"""
if platform.system() == 'Linux':
if re.search("^clang\+\+-*\d*", str(os.environ.get('CXX'))) is None:
raise RuntimeError(
'Only the wheel with clang will be released to PyPI')
- version = ti.core.get_version_string()
- with open('../setup.py') as fin:
- with open('setup.py', 'w') as fout:
- print("project_name = '{}'".format(project_name), file=fout)
- print("version = '{}'".format(version), file=fout)
- for l in fin:
- print(l, file=fout, end='')
-
- print("*** project_name = '{}'".format(project_name))
-
- try:
- os.remove('taichi/CHANGELOG.md')
- except FileNotFoundError:
- pass
- shutil.rmtree('taichi/lib', ignore_errors=True)
- shutil.rmtree('taichi/tests', ignore_errors=True)
- shutil.rmtree('taichi/examples', ignore_errors=True)
- shutil.rmtree('taichi/assets', ignore_errors=True)
- os.makedirs('taichi/lib', exist_ok=True)
- shutil.rmtree('build', ignore_errors=True)
- shutil.rmtree('dist', ignore_errors=True)
- shutil.rmtree('taichi/include', ignore_errors=True)
- # shutil.copytree('../include/', 'taichi/include')
- build_dir = '../build'
-
- if get_os_name() == 'linux':
- shutil.copy('../build/libtaichi_core.so', 'taichi/lib/taichi_core.so')
- elif get_os_name() == 'osx':
- shutil.copy('../build/libtaichi_core.dylib',
- 'taichi/lib/taichi_core.so')
- else:
- shutil.copy('../runtimes/RelWithDebInfo/taichi_core.dll',
- 'taichi/lib/taichi_core.pyd')
-
- os.system(f'cd .. && {get_python_executable()} -m taichi changelog --save')
-
- try:
- with open('../CHANGELOG.md') as f:
- print(f.read())
- except FileNotFoundError:
- print('CHANGELOG.md not found')
- pass
-
- try:
- shutil.copy('../CHANGELOG.md', './taichi/CHANGELOG.md')
- except FileNotFoundError:
- pass
- shutil.copytree('../tests/python', './taichi/tests')
- shutil.copytree('../examples', './taichi/examples')
- shutil.copytree('../external/assets', './taichi/assets')
-
- if get_os_name() != 'osx':
- libdevice_path = ti.core.libdevice_path()
- print("copying libdevice:", libdevice_path)
- assert os.path.exists(libdevice_path)
- shutil.copy(libdevice_path, 'taichi/lib/slim_libdevice.10.bc')
-
- ti.core.compile_runtimes()
- runtime_dir = ti.core.get_runtime_dir()
- for f in os.listdir(runtime_dir):
- if f.startswith('runtime_') and f.endswith('.bc'):
- print(f"Fetching runtime file {f}")
- shutil.copy(os.path.join(runtime_dir, f), 'taichi/lib')
print("Using python executable", get_python_executable())
os.system(
'{} -m pip install --user --upgrade twine setuptools wheel'.format(
get_python_executable()))
+ os.system(f'{get_python_executable()} -m taichi changelog --save')
+
if get_os_name() == 'linux':
- os.system('{} setup.py bdist_wheel -p manylinux1_x86_64'.format(
- get_python_executable()))
+ os.system(
+ f'cd ..; PROJECT_NAME={project_name} {get_python_executable()} setup.py bdist_wheel -p manylinux1_x86_64'
+ )
else:
- os.system('{} setup.py bdist_wheel'.format(get_python_executable()))
+ os.system(
+ f'cd ..; PROJECT_NAME={project_name} {get_python_executable()} setup.py bdist_wheel'
+ )
- shutil.rmtree('taichi/lib')
- shutil.rmtree('taichi/tests')
- shutil.rmtree('taichi/examples')
- shutil.rmtree('taichi/assets')
try:
os.remove('taichi/CHANGELOG.md')
except FileNotFoundError:
pass
- shutil.rmtree('./build')
+ shutil.rmtree('../build')
def parse_args():
@@ -149,6 +85,8 @@ def main():
env_pypi_pwd = os.environ.get('PYPI_PWD', '')
+ shutil.rmtree('../dist', ignore_errors=True)
+
if mode == 'try_upload':
if env_pypi_pwd == '':
print("Missing environment variable PYPI_PWD")
@@ -174,16 +112,18 @@ def main():
get_python_executable(), pypi_repo, pypi_user))
elif mode == 'test':
print('Uninstalling old taichi packages...')
- os.system(f'{get_python_executable()} -m pip uninstall taichi-nightly')
- os.system(f'{get_python_executable()} -m pip uninstall taichi')
- dists = os.listdir('dist')
+ os.system(
+ f'{get_python_executable()} -m pip uninstall -y taichi-nightly')
+ os.system(f'{get_python_executable()} -m pip uninstall -y taichi')
+ dists = os.listdir('../dist')
assert len(dists) == 1
dist = dists[0]
print('Installing ', dist)
os.environ['PYTHONPATH'] = ''
os.makedirs('test_env', exist_ok=True)
- os.system('cd test_env && {} -m pip install ../dist/{} --user'.format(
- get_python_executable(), dist))
+ os.system(
+ 'cd test_env && {} -m pip install ../../dist/{} --user'.format(
+ get_python_executable(), dist))
print('Entering test environment...')
if get_os_name() == 'win':
os.system(
diff --git a/python/taichi/lang/__init__.py b/python/taichi/lang/__init__.py
index 97678d9c0..dcfd0edec 100644
--- a/python/taichi/lang/__init__.py
+++ b/python/taichi/lang/__init__.py
@@ -404,7 +404,8 @@ def visit(node):
from taichi.lang.meta import clear_gradients
clear_gradients(places)
- visit(ti.root)
+ for root_fb in FieldsBuilder.finalized_roots():
+ visit(root_fb)
def benchmark(func, repeat=300, args=()):
diff --git a/python/taichi/lang/expr.py b/python/taichi/lang/expr.py
index b999bc202..62386afe2 100644
--- a/python/taichi/lang/expr.py
+++ b/python/taichi/lang/expr.py
@@ -141,6 +141,10 @@ def snode(self):
def __hash__(self):
return self.ptr.get_raw_address()
+ @property
+ def name(self):
+ return self.snode.name
+
@property
def shape(self):
if self.ptr.is_external_var():
diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py
index e3d010be0..819b0e5aa 100644
--- a/python/taichi/lang/impl.py
+++ b/python/taichi/lang/impl.py
@@ -11,6 +11,7 @@
from taichi.lang.util import (cook_dtype, is_taichi_class, python_scope,
taichi_scope)
from taichi.misc.util import deprecated, get_traceback, warning
+from taichi.snode.fields_builder import FieldsBuilder
import taichi as ti
@@ -127,6 +128,14 @@ def subscript(value, *indices):
raise TypeError(
'Subscription (e.g., "a[i, j]") only works on fields or external arrays.'
)
+ if not value.ptr.is_external_var() and value.ptr.snode() is None:
+ if not value.ptr.is_primal():
+ raise RuntimeError(
+ f"Gradient {value.ptr.get_expr_name()} has not been placed, check whether `needs_grad=True`"
+ )
+ else:
+ raise RuntimeError(
+ f"{value.ptr.get_expr_name()} has not been placed.")
field_dim = int(value.ptr.get_attribute("dim"))
else:
# When reading bit structure we only support the 0-D case for now.
@@ -237,16 +246,20 @@ def create_program(self):
if self.prog is None:
self.prog = _ti_core.Program()
+ def materialize_root_fb(self, first):
+ if (not root.finalized and not root.empty) or first:
+ root.finalize()
+
+ if root.finalized:
+ global _root_fb
+ _root_fb = FieldsBuilder()
+
def materialize(self):
+ self.materialize_root_fb(not self.materialized)
+
if self.materialized:
return
- print('[Taichi] materializing...')
- self.create_program()
-
- if not root.finalized:
- root.finalize()
-
self.materialized = True
not_placed = []
for var in self.global_vars:
@@ -424,7 +437,7 @@ def var(dt, shape=None, offset=None, needs_grad=False):
@python_scope
-def field(dtype, shape=None, offset=None, needs_grad=False):
+def field(dtype, shape=None, name="", offset=None, needs_grad=False):
_taichi_skip_traceback = 1
dtype = cook_dtype(dtype)
@@ -449,6 +462,7 @@ def field(dtype, shape=None, offset=None, needs_grad=False):
x = Expr(_ti_core.make_id_expr(""))
x.declaration_tb = get_traceback(stacklevel=2)
x.ptr = _ti_core.global_new(x.ptr, dtype)
+ x.ptr.set_name(name)
x.ptr.set_is_primal(True)
pytaichi.global_vars.append(x)
@@ -456,6 +470,7 @@ def field(dtype, shape=None, offset=None, needs_grad=False):
# adjoint
x_grad = Expr(_ti_core.make_id_expr(""))
x_grad.ptr = _ti_core.global_new(x_grad.ptr, dtype)
+ x_grad.ptr.set_name(name + ".grad")
x_grad.ptr.set_is_primal(False)
x.set_grad(x_grad)
@@ -502,7 +517,13 @@ def vars2entries(vars):
if hasattr(var, '__ti_repr__'):
res = var.__ti_repr__()
elif isinstance(var, (list, tuple)):
- res = list_ti_repr(var)
+ res = var
+ # If the first element is '__ti_format__', this list is the result of ti_format.
+ if len(var) > 0 and isinstance(
+ var[0], str) and var[0] == '__ti_format__':
+ res = var[1:]
+ else:
+ res = list_ti_repr(var)
else:
yield var
continue
@@ -537,6 +558,35 @@ def fused_string(entries):
_ti_core.create_print(contentries)
+@taichi_scope
+def ti_format(*args):
+ content = args[0]
+ mixed = args[1:]
+ new_mixed = []
+ args = []
+ for x in mixed:
+ if isinstance(x, ti.Expr):
+ new_mixed.append('{}')
+ args.append(x)
+ else:
+ new_mixed.append(x)
+
+ try:
+ content = content.format(*new_mixed)
+ except ValueError:
+ print('Number formatting is not supported with Taichi fields')
+ exit(1)
+ res = content.split('{}')
+ assert len(res) == len(
+ args
+ ) + 1, 'Number of args is different from number of positions provided in string'
+
+ for i in range(len(args)):
+ res.insert(i * 2 + 1, args[i])
+ res.insert(0, '__ti_format__')
+ return res
+
+
@taichi_scope
def ti_assert(cond, msg, extra_args):
# Mostly a wrapper to help us convert from Expr (defined in Python) to
diff --git a/python/taichi/lang/kernel_impl.py b/python/taichi/lang/kernel_impl.py
index eb2a07c38..2f648e4ec 100644
--- a/python/taichi/lang/kernel_impl.py
+++ b/python/taichi/lang/kernel_impl.py
@@ -351,8 +351,7 @@ def materialize(self, key=None, args=None, arg_features=None):
_taichi_skip_traceback = 1
if key is None:
key = (self.func, 0)
- if not self.runtime.materialized:
- self.runtime.materialize()
+ self.runtime.materialize()
if key in self.compiled_functions:
return
grad_suffix = ""
diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py
index 46187c54f..42362f1b8 100644
--- a/python/taichi/lang/matrix.py
+++ b/python/taichi/lang/matrix.py
@@ -585,6 +585,10 @@ def shape(self):
def dim(self):
return len(self.shape)
+ @property
+ def name(self):
+ return self.loop_range().name
+
@property
def dtype(self):
return self.loop_range().dtype
@@ -807,6 +811,7 @@ def field(cls,
m,
dtype,
shape=None,
+ name="",
offset=None,
needs_grad=False,
layout=None): # TODO(archibate): deprecate layout
@@ -825,17 +830,17 @@ def field(cls,
dtype
) == n, f'Please set correct dtype list for Vector. The shape of dtype list should be ({n}, ) instead of {np.shape(dtype)}'
for i in range(n):
- self.entries.append(impl.field(dtype[i]))
+ self.entries.append(impl.field(dtype[i], name=name))
else:
assert len(np.shape(dtype)) == 2 and len(dtype) == n and len(
dtype[0]
) == m, f'Please set correct dtype list for Matrix. The shape of dtype list should be ({n}, {m}) instead of {np.shape(dtype)}'
for i in range(n):
for j in range(m):
- self.entries.append(impl.field(dtype[i][j]))
+ self.entries.append(impl.field(dtype[i][j], name=name))
else:
for _ in range(n * m):
- self.entries.append(impl.field(dtype))
+ self.entries.append(impl.field(dtype, name=name))
self.grad = self.make_grad()
if layout is not None:
diff --git a/python/taichi/lang/meta.py b/python/taichi/lang/meta.py
index 0a20bb365..73250fd41 100644
--- a/python/taichi/lang/meta.py
+++ b/python/taichi/lang/meta.py
@@ -1,26 +1,28 @@
from taichi.core import settings
from taichi.lang import impl
from taichi.lang.expr import Expr
+from taichi.lang.kernel_arguments import ext_arr, template
+from taichi.lang.kernel_impl import kernel
import taichi as ti
# A set of helper (meta)functions
-@ti.kernel
-def fill_tensor(tensor: ti.template(), val: ti.template()):
+@kernel
+def fill_tensor(tensor: template(), val: template()):
for I in ti.grouped(tensor):
tensor[I] = val
-@ti.kernel
-def tensor_to_ext_arr(tensor: ti.template(), arr: ti.ext_arr()):
+@kernel
+def tensor_to_ext_arr(tensor: template(), arr: ext_arr()):
for I in ti.grouped(tensor):
arr[I] = tensor[I]
-@ti.kernel
-def vector_to_fast_image(img: ti.template(), out: ti.ext_arr()):
+@kernel
+def vector_to_fast_image(img: template(), out: ext_arr()):
# FIXME: Why is ``for i, j in img:`` slower than:
for i, j in ti.ndrange(*img.shape):
r, g, b = 0, 0, 0
@@ -45,8 +47,8 @@ def vector_to_fast_image(img: ti.template(), out: ti.ext_arr()):
out[idx] = (b << 16) + (g << 8) + r + alpha
-@ti.kernel
-def tensor_to_image(tensor: ti.template(), arr: ti.ext_arr()):
+@kernel
+def tensor_to_image(tensor: template(), arr: ext_arr()):
for I in ti.grouped(tensor):
t = ti.cast(tensor[I], ti.f32)
arr[I, 0] = t
@@ -54,8 +56,8 @@ def tensor_to_image(tensor: ti.template(), arr: ti.ext_arr()):
arr[I, 2] = t
-@ti.kernel
-def vector_to_image(mat: ti.template(), arr: ti.ext_arr()):
+@kernel
+def vector_to_image(mat: template(), arr: ext_arr()):
for I in ti.grouped(mat):
for p in ti.static(range(mat.n)):
arr[I, p] = ti.cast(mat[I][p], ti.f32)
@@ -63,21 +65,20 @@ def vector_to_image(mat: ti.template(), arr: ti.ext_arr()):
arr[I, 2] = 0
-@ti.kernel
-def tensor_to_tensor(tensor: ti.template(), other: ti.template()):
+@kernel
+def tensor_to_tensor(tensor: template(), other: template()):
for I in ti.grouped(tensor):
tensor[I] = other[I]
-@ti.kernel
-def ext_arr_to_tensor(arr: ti.ext_arr(), tensor: ti.template()):
+@kernel
+def ext_arr_to_tensor(arr: ext_arr(), tensor: template()):
for I in ti.grouped(tensor):
tensor[I] = arr[I]
-@ti.kernel
-def matrix_to_ext_arr(mat: ti.template(), arr: ti.ext_arr(),
- as_vector: ti.template()):
+@kernel
+def matrix_to_ext_arr(mat: template(), arr: ext_arr(), as_vector: template()):
for I in ti.grouped(mat):
for p in ti.static(range(mat.n)):
for q in ti.static(range(mat.m)):
@@ -87,9 +88,8 @@ def matrix_to_ext_arr(mat: ti.template(), arr: ti.ext_arr(),
arr[I, p, q] = mat[I][p, q]
-@ti.kernel
-def ext_arr_to_matrix(arr: ti.ext_arr(), mat: ti.template(),
- as_vector: ti.template()):
+@kernel
+def ext_arr_to_matrix(arr: ext_arr(), mat: template(), as_vector: template()):
for I in ti.grouped(mat):
for p in ti.static(range(mat.n)):
for q in ti.static(range(mat.m)):
@@ -99,36 +99,36 @@ def ext_arr_to_matrix(arr: ti.ext_arr(), mat: ti.template(),
mat[I][p, q] = arr[I, p, q]
-@ti.kernel
-def clear_gradients(vars: ti.template()):
+@kernel
+def clear_gradients(vars: template()):
for I in ti.grouped(Expr(vars[0])):
for s in ti.static(vars):
Expr(s)[I] = 0
-@ti.kernel
-def clear_loss(l: ti.template()):
+@kernel
+def clear_loss(l: template()):
# Using SNode writers would result in a forced sync, therefore we wrap these
# writes into a kernel.
l[None] = 0
l.grad[None] = 1
-@ti.kernel
-def fill_matrix(mat: ti.template(), vals: ti.template()):
+@kernel
+def fill_matrix(mat: template(), vals: template()):
for I in ti.grouped(mat):
for p in ti.static(range(mat.n)):
for q in ti.static(range(mat.m)):
mat[I][p, q] = vals[p][q]
-@ti.kernel
-def snode_deactivate(b: ti.template()):
+@kernel
+def snode_deactivate(b: template()):
for I in ti.grouped(b):
ti.deactivate(b, I)
-@ti.kernel
-def snode_deactivate_dynamic(b: ti.template()):
+@kernel
+def snode_deactivate_dynamic(b: template()):
for I in ti.grouped(b.parent()):
ti.deactivate(b, I)
diff --git a/python/taichi/lang/ops.py b/python/taichi/lang/ops.py
index 6ea9fa84a..4ceac5579 100644
--- a/python/taichi/lang/ops.py
+++ b/python/taichi/lang/ops.py
@@ -526,7 +526,6 @@ def external_func_call(func, args=[], outputs=[]):
def asm(source, inputs=[], outputs=[]):
-
_ti_core.insert_external_func_call(0, source, make_expr_group(inputs),
make_expr_group(outputs))
@@ -567,11 +566,11 @@ def rescale_index(a, b, I):
"""
assert isinstance(a, Expr) and a.is_global(), \
- f"first arguement must be a field"
+ f"first arguement must be a field"
assert isinstance(b, Expr) and b.is_global(), \
- f"second arguement must be a field"
+ f"second arguement must be a field"
assert isinstance(I, matrix.Matrix) and not I.is_global(), \
- f"third arguement must be a grouped index"
+ f"third arguement must be a grouped index"
Ib = I.copy()
for n in range(min(I.n, min(len(a.shape), len(b.shape)))):
if a.shape[n] > b.shape[n]:
diff --git a/python/taichi/lang/snode.py b/python/taichi/lang/snode.py
index 7086d3f88..e782cbde2 100644
--- a/python/taichi/lang/snode.py
+++ b/python/taichi/lang/snode.py
@@ -140,6 +140,10 @@ def get_shape(self, i):
def loop_range(self):
return Expr(_ti_core.global_var_expr_from_snode(self.ptr))
+ @property
+ def name(self):
+ return self.ptr.name()
+
@deprecated('x.snode()', 'x.snode')
def __call__(self): # TODO: remove this after v0.7.0
return self
diff --git a/python/taichi/lang/transformer.py b/python/taichi/lang/transformer.py
index a3a52b9ef..b256b8e92 100644
--- a/python/taichi/lang/transformer.py
+++ b/python/taichi/lang/transformer.py
@@ -4,6 +4,7 @@
from taichi.lang import impl
from taichi.lang.ast_resolver import ASTResolver
from taichi.lang.exception import TaichiSyntaxError
+from taichi.lang.kernel_arguments import ext_arr, template
from taichi.lang.util import to_taichi_type
import taichi as ti
@@ -625,6 +626,11 @@ def visit_Call(self, node):
if not ASTResolver.resolve_to(node.func, ti.static, globals()):
# Do not apply the generic visitor if the function called is ti.static
self.generic_visit(node)
+ if isinstance(node.func, ast.Attribute):
+ attr_name = node.func.attr
+ if attr_name == 'format':
+ node.args.insert(0, node.func.value)
+ node.func = self.parse_expr('ti.ti_format')
if isinstance(node.func, ast.Name):
func_name = node.func.id
if func_name == 'print':
@@ -679,9 +685,9 @@ def transform_as_kernel():
for i, arg in enumerate(args.args):
# Directly pass in template arguments,
# such as class instances ("self"), fields, SNodes, etc.
- if isinstance(self.func.argument_annotations[i], ti.template):
+ if isinstance(self.func.argument_annotations[i], template):
continue
- if isinstance(self.func.argument_annotations[i], ti.ext_arr):
+ if isinstance(self.func.argument_annotations[i], ext_arr):
arg_init = self.parse_stmt(
'x = ti.lang.kernel_arguments.decl_ext_arr_arg(0, 0)')
arg_init.targets[0].id = arg.arg
@@ -725,8 +731,7 @@ def transform_as_kernel():
for i, arg in enumerate(args.args):
# Directly pass in template arguments,
# such as class instances ("self"), fields, SNodes, etc.
- if isinstance(self.func.argument_annotations[i],
- ti.template):
+ if isinstance(self.func.argument_annotations[i], template):
continue
# Create a copy for non-template arguments,
# so that they are passed by value.
diff --git a/python/taichi/misc/gui.py b/python/taichi/misc/gui.py
index 073f8c280..deb69e9b1 100644
--- a/python/taichi/misc/gui.py
+++ b/python/taichi/misc/gui.py
@@ -210,7 +210,12 @@ def set_image(self, img):
def circle(self, pos, color=0xFFFFFF, radius=1):
self.canvas.circle_single(pos[0], pos[1], color, radius)
- def circles(self, pos, color=0xFFFFFF, radius=1):
+ def circles(self,
+ pos,
+ radius=1,
+ color=0xFFFFFF,
+ palette=None,
+ palette_indices=None):
n = pos.shape[0]
if len(pos.shape) == 3:
assert pos.shape[2] == 1
@@ -235,6 +240,40 @@ def circles(self, pos, color=0xFFFFFF, radius=1):
raise ValueError(
'Color must be an ndarray or int (e.g., 0x956333)')
+ if palette is not None:
+ assert palette_indices is not None, 'palette must be used together with palette_indices'
+
+ from taichi.lang.expr import Expr
+
+ if isinstance(palette_indices, Expr):
+ ind_int = palette_indices.to_numpy().astype(np.uint32)
+ elif isinstance(palette_indices, list) or isinstance(
+ palette_indices, np.ndarray):
+ ind_int = np.array(palette_indices).astype(np.uint32)
+ else:
+ try:
+ ind_int = np.array(palette_indices)
+ except:
+ raise TypeError(
+ 'palette_indices must be a type that can be converted to numpy.ndarray'
+ )
+
+ assert issubclass(
+ ind_int.dtype.type,
+ np.integer), 'palette_indices must be an integer array'
+ assert ind_int.shape == (
+ n,
+ ), 'palette_indices must be in 1-d shape with shape (num_particles, )'
+ assert min(
+ ind_int
+ ) >= 0, 'the min of palette_indices must not be less than zero'
+ assert max(ind_int) < len(
+ palette
+ ), 'the max of palette_indices must not exceed the length of palette'
+ color_array = np.array(palette, dtype=np.uint32)[ind_int]
+ color_array = np.ascontiguousarray(color_array)
+ color_array = color_array.ctypes.data
+
if isinstance(radius, np.ndarray):
assert radius.shape == (n, )
radius = np.ascontiguousarray(radius.astype(np.float32))
diff --git a/python/taichi/snode/fields_builder.py b/python/taichi/snode/fields_builder.py
index 41d688ae8..f87209d1c 100644
--- a/python/taichi/snode/fields_builder.py
+++ b/python/taichi/snode/fields_builder.py
@@ -5,6 +5,7 @@
from taichi.core.util import ti_core as _ti_core
from taichi.lang import impl, snode
from taichi.lang.exception import InvalidOperationError
+from taichi.misc.util import warning
_snode_registry = _ti_core.SNodeRegistry()
@@ -37,6 +38,16 @@ def __init__(self):
self._ptr = _snode_registry.create_root()
self._root = snode.SNode(self._ptr)
self._finalized = False
+ self._empty = True
+
+ @classmethod
+ def finalized_roots(cls):
+ roots_ptr = []
+ size = impl.get_runtime().prog.get_snode_tree_size()
+ for i in range(size):
+ res = impl.get_runtime().prog.get_snode_root(i)
+ roots_ptr.append(snode.SNode(res))
+ return roots_ptr
@property
def ptr(self):
@@ -46,16 +57,26 @@ def ptr(self):
def root(self):
return self._root
+ @property
+ def empty(self):
+ return self._empty
+
+ @property
+ def finalized(self):
+ return self._finalized
+
def dense(self, indices: Union[Sequence[_Axis], _Axis],
dimensions: Union[Sequence[int], int]):
"""Same as :func:`taichi.SNode.dense`"""
self._check_not_finalized()
+ self._empty = False
return self._root.dense(indices, dimensions)
def pointer(self, indices: Union[Sequence[_Axis], _Axis],
dimensions: Union[Sequence[int], int]):
"""Same as :func:`taichi.SNode.pointer`"""
self._check_not_finalized()
+ self._empty = False
return self._root.pointer(indices, dimensions)
def hash(self, indices, dimensions):
@@ -67,23 +88,27 @@ def dynamic(self,
chunk_size: Optional[int] = None):
"""Same as :func:`taichi.SNode.dynamic`"""
self._check_not_finalized()
+ self._empty = False
return self._root.dynamic(index, dimension, chunk_size)
def bitmasked(self, indices: Union[Sequence[_Axis], _Axis],
dimensions: Union[Sequence[int], int]):
"""Same as :func:`taichi.SNode.bitmasked`"""
self._check_not_finalized()
+ self._empty = False
return self._root.bitmasked(indices, dimensions)
def bit_struct(self, num_bits: int):
"""Same as :func:`taichi.SNode.bit_struct`"""
self._check_not_finalized()
+ self._empty = False
return self._root.bit_struct(num_bits)
def bit_array(self, indices: Union[Sequence[_Axis], _Axis],
dimensions: Union[Sequence[int], int], num_bits: int):
"""Same as :func:`taichi.SNode.bit_array`"""
self._check_not_finalized()
+ self._empty = False
return self._root.bit_array(indices, dimensions, num_bits)
def place(self,
@@ -92,17 +117,21 @@ def place(self,
shared_exponent: bool = False):
"""Same as :func:`taichi.SNode.place`"""
self._check_not_finalized()
+ self._empty = False
self._root.place(*args, offset=offset, shared_exponent=shared_exponent)
def lazy_grad(self):
"""Same as :func:`taichi.SNode.lazy_grad`"""
# TODO: This complicates the implementation. Figure out why we need this
self._check_not_finalized()
+ self._empty = False
self._root.lazy_grad()
def finalize(self):
"""Constructs the SNodeTree and finalizes this builder."""
self._check_not_finalized()
+ if self._empty:
+ warning("Finalizing an empty FieldsBuilder!")
_ti_core.finalize_snode_tree(_snode_registry, self._ptr,
impl.get_runtime().prog)
self._finalized = True
@@ -110,7 +139,3 @@ def finalize(self):
def _check_not_finalized(self):
if self._finalized:
raise InvalidOperationError('FieldsBuilder finalized')
-
- @property
- def finalized(self):
- return self._finalized
diff --git a/python/taichi/torch_io.py b/python/taichi/torch_io.py
index 30794729a..8a800a151 100644
--- a/python/taichi/torch_io.py
+++ b/python/taichi/torch_io.py
@@ -1,14 +1,15 @@
-import taichi as ti
+from taichi.lang.kernel_arguments import ext_arr, template
+from taichi.lang.kernel_impl import kernel
-@ti.kernel
-def from_torch_template(expr: ti.template(), torch_tensor: ti.ext_arr()):
+@kernel
+def from_torch_template(expr: template(), torch_tensor: ext_arr()):
for i in expr:
expr[i] = torch_tensor[i]
-@ti.kernel
-def to_torch_template(expr: ti.template(), torch_tensor: ti.ext_arr()):
+@kernel
+def to_torch_template(expr: template(), torch_tensor: ext_arr()):
for i in expr:
torch_tensor[i] = expr[i]
diff --git a/python/setup.cfg b/setup.cfg
similarity index 100%
rename from python/setup.cfg
rename to setup.cfg
diff --git a/setup.py b/setup.py
index cad2a6d6d..06f5a41f7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,15 @@
import glob
+import multiprocessing
+import os
+import platform
+import shutil
+import subprocess
+import sys
-import setuptools
+from setuptools import Extension, find_packages, setup
+from setuptools.command.build_ext import build_ext
+from setuptools.command.build_py import build_py
+from setuptools.command.egg_info import egg_info
classifiers = [
'Development Status :: 2 - Pre-Alpha',
@@ -16,35 +25,187 @@
'Programming Language :: Python :: 3.9',
]
+project_name = os.getenv('PROJECT_NAME', 'taichi')
+TI_VERSION_MAJOR = 0
+TI_VERSION_MINOR = 7
+TI_VERSION_PATCH = 27
+version = f'{TI_VERSION_MAJOR}.{TI_VERSION_MINOR}.{TI_VERSION_PATCH}'
+
data_files = glob.glob('python/lib/*')
print(data_files)
-packages = setuptools.find_packages() + ['taichi.examples']
+packages = find_packages('python')
print(packages)
-setuptools.setup(name=project_name,
- packages=packages,
- version=version,
- description='The Taichi Programming Language',
- author='Taichi developers',
- author_email='yuanmhu@gmail.com',
- url='https://github.com/taichi-dev/taichi',
- install_requires=[
- 'numpy',
- 'pybind11>=2.5.0',
- 'sourceinspect>=0.0.4',
- 'colorama',
- 'astor',
- ],
- data_files=[('lib', data_files)],
- keywords=['graphics', 'simulation'],
- license='MIT',
- include_package_data=True,
- entry_points={
- 'console_scripts': [
- 'ti=taichi.main:main',
- ],
- },
- classifiers=classifiers,
- has_ext_modules=lambda: True)
-
-# Note: this is a template setup.py used by python/build.py
+# Our python package root dir is python/
+package_dir = 'python'
+
+
+def get_python_executable():
+ return sys.executable.replace('\\', '/')
+
+
+def get_os_name():
+ name = platform.platform()
+ # in python 3.8, platform.platform() uses mac_ver() on macOS
+ # it will return 'macOS-XXXX' instead of 'Darwin-XXXX'
+ if name.lower().startswith('darwin') or name.lower().startswith('macos'):
+ return 'osx'
+ elif name.lower().startswith('windows'):
+ return 'win'
+ elif name.lower().startswith('linux'):
+ return 'linux'
+ assert False, "Unknown platform name %s" % name
+
+
+def remove_tmp(taichi_dir):
+ shutil.rmtree(os.path.join(taichi_dir, 'assets'), ignore_errors=True)
+ shutil.rmtree(os.path.join(taichi_dir, 'examples'), ignore_errors=True)
+ shutil.rmtree(os.path.join(taichi_dir, 'tests'), ignore_errors=True)
+
+
+class CMakeExtension(Extension):
+ def __init__(self, name):
+ Extension.__init__(self, name, sources=[])
+
+
+class EggInfo(egg_info):
+ def run(self):
+ taichi_dir = os.path.join(package_dir, 'taichi')
+ remove_tmp(taichi_dir)
+ shutil.rmtree('build', ignore_errors=True)
+
+ shutil.copytree('tests/python', os.path.join(taichi_dir, 'tests'))
+ shutil.copytree('examples', os.path.join(taichi_dir, 'examples'))
+ shutil.copytree('external/assets', os.path.join(taichi_dir, 'assets'))
+
+ egg_info.run(self)
+
+
+# python setup.py build runs the following commands in order:
+# python setup.py build_py
+# python setup.py build_ext
+class BuildPy(build_py):
+ def run(self):
+ build_py.run(self)
+ taichi_dir = os.path.join(package_dir, 'taichi')
+ remove_tmp(taichi_dir)
+
+
+class CMakeBuild(build_ext):
+ def parse_cmake_args_from_env(self):
+ # Source: TAICHI_CMAKE_ARGS=... python setup.py ...
+ cmake_args = os.getenv('TAICHI_CMAKE_ARGS', '')
+ return cmake_args.strip().split()
+
+ def run(self):
+ try:
+ out = subprocess.check_output(['cmake', '--version'])
+ except OSError:
+ raise RuntimeError(
+ "CMake must be installed to build the following extensions: " +
+ ", ".join(e.name for e in self.extensions))
+
+ # CMakeLists.txt is in the same directory as this setup.py file
+ cmake_list_dir = os.path.abspath(os.path.dirname(__file__))
+ self.build_temp = os.path.join(cmake_list_dir, 'build')
+
+ build_directory = os.path.abspath(self.build_temp)
+
+ cmake_args = self.parse_cmake_args_from_env()
+
+ cmake_args += [
+ f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={build_directory}',
+ f'-DPYTHON_EXECUTABLE={get_python_executable()}',
+ f'-DTI_VERSION_MAJOR={TI_VERSION_MAJOR}',
+ f'-DTI_VERSION_MINOR={TI_VERSION_MINOR}',
+ f'-DTI_VERSION_PATCH={TI_VERSION_PATCH}',
+ ]
+
+ cfg = 'Debug' if self.debug else 'Release'
+ build_args = ['--config', cfg]
+
+ cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
+
+ # Assuming Makefiles
+ build_args += ['--', f'-j{multiprocessing.cpu_count()}']
+
+ self.build_args = build_args
+
+ env = os.environ.copy()
+ os.makedirs(self.build_temp, exist_ok=True)
+
+ print('-' * 10, 'Running CMake prepare', '-' * 40)
+ subprocess.check_call(['cmake', cmake_list_dir] + cmake_args,
+ cwd=self.build_temp,
+ env=env)
+
+ print('-' * 10, 'Building extensions', '-' * 40)
+ cmake_cmd = ['cmake', '--build', '.'] + self.build_args
+ subprocess.check_call(cmake_cmd, cwd=self.build_temp)
+
+ self.prepare_package()
+
+ def prepare_package(self):
+ # We need to make sure these additional files are ready for
+ # - develop mode: must exist in local python/taichi/lib/ folder
+ # - install mode: must exist in self.build_lib/taichi/lib
+ taichi_lib_dir = 'taichi/lib'
+ for target in (
+ os.path.join(package_dir, taichi_lib_dir),
+ os.path.join(self.build_lib, taichi_lib_dir),
+ ):
+ shutil.rmtree(target, ignore_errors=True)
+ os.makedirs(target)
+ if get_os_name() == 'linux':
+ shutil.copy(os.path.join(self.build_temp, 'libtaichi_core.so'),
+ os.path.join(target, 'taichi_core.so'))
+ elif get_os_name() == 'osx':
+ shutil.copy(
+ os.path.join(self.build_temp, 'libtaichi_core.dylib'),
+ os.path.join(target, 'taichi_core.so'))
+ else:
+ shutil.copy('../runtimes/RelWithDebInfo/taichi_core.dll',
+ os.path.join(target, 'taichi_core.pyd'))
+
+ if get_os_name() != 'osx':
+ libdevice_path = 'external/cuda_libdevice/slim_libdevice.10.bc'
+ print("copying libdevice:", libdevice_path)
+ assert os.path.exists(libdevice_path)
+ shutil.copy(libdevice_path,
+ os.path.join(target, 'slim_libdevice.10.bc'))
+
+ llvm_runtime_dir = 'taichi/runtime/llvm'
+ for f in os.listdir(llvm_runtime_dir):
+ if f.startswith('runtime_') and f.endswith('.bc'):
+ print(f"Fetching runtime file {f} to {target} folder")
+ shutil.copy(os.path.join(llvm_runtime_dir, f), target)
+
+
+setup(name=project_name,
+ packages=packages,
+ package_dir={"": package_dir},
+ version=version,
+ description='The Taichi Programming Language',
+ author='Taichi developers',
+ author_email='yuanmhu@gmail.com',
+ url='https://github.com/taichi-dev/taichi',
+ install_requires=[
+ 'numpy',
+ 'pybind11>=2.5.0',
+ 'sourceinspect>=0.0.4',
+ 'colorama',
+ 'astor',
+ ],
+ data_files=[('lib', data_files)],
+ keywords=['graphics', 'simulation'],
+ license='MIT',
+ include_package_data=True,
+ entry_points={
+ 'console_scripts': [
+ 'ti=taichi.main:main',
+ ],
+ },
+ classifiers=classifiers,
+ ext_modules=[CMakeExtension('taichi_core')],
+ cmdclass=dict(egg_info=EggInfo, build_py=BuildPy, build_ext=CMakeBuild),
+ has_ext_modules=lambda: True)
diff --git a/taichi/backends/metal/shaders/runtime_structs.metal.h b/taichi/backends/metal/shaders/runtime_structs.metal.h
index 171e8463f..9b3347809 100644
--- a/taichi/backends/metal/shaders/runtime_structs.metal.h
+++ b/taichi/backends/metal/shaders/runtime_structs.metal.h
@@ -117,7 +117,7 @@ STR(
int32_t start = 0;
int32_t num_bits = 0;
int32_t acc_offset = 0;
- int32_t num_elements = 0;
+ int32_t num_elements_from_root = 0;
};
Extractor extractors[kTaichiMaxNumIndices];
diff --git a/taichi/backends/vulkan/codegen_vulkan.cpp b/taichi/backends/vulkan/codegen_vulkan.cpp
new file mode 100644
index 000000000..8a6b9f4bf
--- /dev/null
+++ b/taichi/backends/vulkan/codegen_vulkan.cpp
@@ -0,0 +1,795 @@
+#include "taichi/backends/vulkan/codegen_vulkan.h"
+
+#include
+#include
+
+#include "taichi/program/program.h"
+#include "taichi/program/kernel.h"
+#include "taichi/ir/statements.h"
+#include "taichi/ir/ir.h"
+#include "taichi/util/line_appender.h"
+#include "taichi/backends/vulkan/kernel_utils.h"
+#include "taichi/backends/vulkan/runtime.h"
+#include "taichi/backends/opengl/opengl_data_types.h"
+#include "taichi/ir/transforms.h"
+
+namespace taichi {
+namespace lang {
+namespace vulkan {
+namespace {
+
+constexpr char kRootBufferName[] = "root_buffer";
+constexpr char kGlobalTmpsBufferName[] = "global_tmps_buffer";
+constexpr char kContextBufferName[] = "context_buffer";
+
+constexpr char kGlobalInvocationIDName[] = "int(gl_GlobalInvocationID.x)";
+constexpr char kLinearLoopIndexName[] = "linear_loop_idx_";
+
+constexpr int kMaxNumThreadsGridStrideLoop = 65536;
+
+#define TI_INSIDE_VULKAN_CODEGEN
+#include "taichi/backends/vulkan/shaders/atomics.glsl.h"
+#undef TI_INSIDE_VULKAN_CODEGEN
+
+using opengl::opengl_data_type_name;
+using BuffersEnum = TaskAttributes::Buffers;
+using BufferBind = TaskAttributes::BufferBind;
+
+std::string buffer_instance_name(BuffersEnum b) {
+ // https://www.khronos.org/opengl/wiki/Interface_Block_(GLSL)#Syntax
+ switch (b) {
+ case BuffersEnum::Root:
+ return kRootBufferName;
+ case BuffersEnum::GlobalTmps:
+ return kGlobalTmpsBufferName;
+ case BuffersEnum::Context:
+ return kContextBufferName;
+ default:
+ TI_NOT_IMPLEMENTED;
+ break;
+ }
+ return {};
+}
+
+std::string store_as_int_bits(const std::string &in, DataType dt) {
+ if (dt->is_primitive(PrimitiveTypeID::f32)) {
+ return fmt::format("floatBitsToInt({})", in);
+ }
+ return in;
+}
+
+std::string load_from_int_bits(const std::string &in, DataType dt) {
+ if (dt->is_primitive(PrimitiveTypeID::f32)) {
+ return fmt::format("intBitsToFloat({})", in);
+ }
+ return in;
+}
+
+std::string vk_data_address_shifter(const Stmt *s, DataType) {
+ // Hardcoded ">> 2" because we only support 32-bit for now.
+ return fmt::format("({} >> 2)", s->raw_name());
+}
+
+class TaskCodegen : public IRVisitor {
+ private:
+ enum class Section {
+ Headers,
+ Kernels,
+ };
+
+ static constexpr Section kAllSections[] = {
+ Section::Headers,
+ Section::Kernels,
+ };
+
+ public:
+ struct Params {
+ OffloadedStmt *task_ir;
+ const CompiledSNodeStructs *compiled_structs;
+ const KernelContextAttributes *ctx_attribs;
+ std::string ti_kernel_name;
+ int task_id_in_kernel;
+ };
+
+ explicit TaskCodegen(const Params ¶ms)
+ : task_ir_(params.task_ir),
+ compiled_structs_(params.compiled_structs),
+ ctx_attribs_(params.ctx_attribs),
+ task_name_(fmt::format("{}_t{:02d}",
+ params.ti_kernel_name,
+ params.task_id_in_kernel)) {
+ allow_undefined_visitor = true;
+ invoke_default_visitor = true;
+ }
+
+ struct Result {
+ std::string source_code;
+ TaskAttributes task_attribs;
+ };
+
+ Result run() {
+ code_section_ = Section::Kernels;
+ if (task_ir_->task_type == OffloadedTaskType::serial) {
+ generate_serial_kernel(task_ir_);
+ } else if (task_ir_->task_type == OffloadedTaskType::range_for) {
+ // struct_for is automatically lowered to ranged_for for dense snodes
+ generate_range_for_kernel(task_ir_);
+ } else {
+ TI_ERROR("Unsupported offload type={} on Vulkan arch",
+ task_ir_->task_name());
+ }
+ // Headers need global information, so it has to be delayed after visiting
+ // the task IR.
+ emit_headers();
+
+ Result res;
+ for (const auto s : kAllSections) {
+ res.source_code += section_appenders_.find(s)->second.lines();
+ res.source_code += '\n';
+ }
+ res.task_attribs = std::move(task_attribs_);
+ return res;
+ }
+
+ void visit(OffloadedStmt *) override {
+ TI_ERROR("This codegen is supposed to deal with one offloaded task");
+ }
+
+ void visit(Block *stmt) override {
+ push_indent();
+ for (auto &s : stmt->statements) {
+ s->accept(this);
+ }
+ pop_indent();
+ }
+
+ void visit(ConstStmt *const_stmt) override {
+ TI_ASSERT(const_stmt->width() == 1);
+ emit("const {} {} = {};", opengl_data_type_name(const_stmt->element_type()),
+ const_stmt->raw_name(), const_stmt->val[0].stringify());
+ }
+
+ void visit(AllocaStmt *alloca) override {
+ emit("{} {} = 0;", opengl_data_type_name(alloca->element_type()),
+ alloca->raw_name());
+ }
+
+ void visit(LocalLoadStmt *stmt) override {
+ // TODO: optimize for partially vectorized load...
+ bool linear_index = true;
+ for (int i = 0; i < (int)stmt->src.size(); i++) {
+ if (stmt->src[i].offset != i) {
+ linear_index = false;
+ }
+ }
+ if (stmt->same_source() && linear_index &&
+ stmt->width() == stmt->src[0].var->width()) {
+ auto ptr = stmt->src[0].var;
+ emit("const {} {} = {};", opengl_data_type_name(stmt->element_type()),
+ stmt->raw_name(), ptr->raw_name());
+ } else {
+ TI_NOT_IMPLEMENTED;
+ }
+ }
+
+ void visit(LocalStoreStmt *stmt) override {
+ emit("{} = {};", stmt->dest->raw_name(), stmt->val->raw_name());
+ }
+
+ void visit(GetRootStmt *stmt) override {
+ // Should we assert |root_stmt_| is assigned only once?
+ root_stmt_ = stmt;
+ emit("const int {} = 0;", stmt->raw_name());
+ }
+
+ void visit(GetChStmt *stmt) override {
+ // TODO: GetChStmt -> GetComponentStmt ?
+ const auto &snode_descs = compiled_structs_->snode_descriptors;
+ auto *out_snode = stmt->output_snode;
+ TI_ASSERT(snode_descs.at(stmt->input_snode->id).get_child(stmt->chid) ==
+ out_snode);
+
+ emit("// SNode: {} -> {}", stmt->input_snode->node_type_name,
+ out_snode->node_type_name);
+ emit("const int {} = {} + {};", stmt->raw_name(),
+ stmt->input_ptr->raw_name(),
+ snode_descs.at(out_snode->id).mem_offset_in_parent_cell);
+ if (out_snode->is_place()) {
+ TI_ASSERT(ptr_to_buffers_.count(stmt) == 0);
+ ptr_to_buffers_[stmt] = BuffersEnum::Root;
+ }
+ }
+
+ void visit(SNodeLookupStmt *stmt) override {
+ // TODO: SNodeLookupStmt -> GetSNodeCellStmt ?
+ std::string parent;
+ if (stmt->input_snode) {
+ parent = stmt->input_snode->raw_name();
+ } else {
+ TI_ASSERT(root_stmt_ != nullptr);
+ parent = root_stmt_->raw_name();
+ }
+ const auto *sn = stmt->snode;
+
+ if (stmt->activate && !(sn->type == SNodeType::dense)) {
+ // Sparse SNode not supported yet.
+ TI_NOT_IMPLEMENTED;
+ }
+ const auto &snode_descs = compiled_structs_->snode_descriptors;
+ emit("// Get the cell of SNode {}", sn->node_type_name);
+ emit("const int {} = {} + ({} * {});", stmt->raw_name(), parent,
+ stmt->input_index->raw_name(), snode_descs.at(sn->id).cell_stride);
+ }
+
+ void visit(LinearizeStmt *stmt) override {
+ std::string val = "0";
+ for (int i = 0; i < (int)stmt->inputs.size(); i++) {
+ val = fmt::format("({} * {} + {})", val, stmt->strides[i],
+ stmt->inputs[i]->raw_name());
+ }
+ emit("const int {} = {};", stmt->raw_name(), val);
+ }
+
+ void visit(BitExtractStmt *stmt) override {
+ emit("const int {} = (({} >> {}) & ((1 << {}) - 1));", stmt->raw_name(),
+ stmt->input->raw_name(), stmt->bit_begin,
+ stmt->bit_end - stmt->bit_begin);
+ }
+
+ void visit(LoopIndexStmt *stmt) override {
+ const auto stmt_name = stmt->raw_name();
+ if (stmt->loop->is()) {
+ const auto type = stmt->loop->as()->task_type;
+ if (type == OffloadedTaskType::range_for) {
+ TI_ASSERT(stmt->index == 0);
+ emit("const int {} = {};", stmt_name, kLinearLoopIndexName);
+ } else {
+ TI_NOT_IMPLEMENTED;
+ }
+ } else if (stmt->loop->is()) {
+ TI_ASSERT(stmt->index == 0);
+ emit("const int {} = {};", stmt_name, stmt->loop->raw_name());
+ } else {
+ TI_NOT_IMPLEMENTED;
+ }
+ }
+
+ void visit(GlobalStoreStmt *stmt) override {
+ TI_ASSERT(stmt->width() == 1);
+ const auto dt = stmt->val->element_type();
+ emit("{} = {};", at_buffer(stmt->dest, dt),
+ store_as_int_bits(stmt->val->raw_name(), dt));
+ }
+
+ void visit(GlobalLoadStmt *stmt) override {
+ TI_ASSERT(stmt->width() == 1);
+ auto dt = stmt->element_type();
+ const auto loaded_int = at_buffer(stmt->src, dt);
+ emit("const {} {} = {};", opengl_data_type_name(dt), stmt->raw_name(),
+ load_from_int_bits(loaded_int, dt));
+ }
+
+ void visit(ArgLoadStmt *stmt) override {
+ const auto arg_id = stmt->arg_id;
+ const auto &arg_attribs = ctx_attribs_->args()[arg_id];
+ const auto offset_in_mem = arg_attribs.offset_in_mem;
+ if (stmt->is_ptr) {
+ emit("// Pointer arg: id={} offset_in_mem={}", arg_id, offset_in_mem);
+ // Do not shift! We are indexing the buffers at byte granularity.
+ emit("const int {} = {};", stmt->raw_name(), offset_in_mem);
+ } else {
+ const auto dt = arg_attribs.dt;
+ const auto val_str = fmt::format("{}[{}]", kContextBufferName,
+ (offset_in_mem / sizeof(int32_t)));
+ emit("// Scalar arg: id={} offset_in_mem={}", arg_id, offset_in_mem);
+ emit("const {} {} = {};", opengl_data_type_name(dt), stmt->raw_name(),
+ load_from_int_bits(val_str, dt));
+ }
+ }
+
+ void visit(ReturnStmt *stmt) override {
+ // TODO: use stmt->ret_id instead of 0 as index
+ const auto &ret_attribs = ctx_attribs_->rets()[0];
+ const int index_in_buffer = ret_attribs.offset_in_mem / sizeof(int32_t);
+ emit("// Return value: offset_in_mem={}", ret_attribs.offset_in_mem);
+ emit("{}[{}] = {};", kContextBufferName, index_in_buffer,
+ store_as_int_bits(stmt->value->raw_name(), ret_attribs.dt));
+ }
+
+ void visit(GlobalTemporaryStmt *stmt) override {
+ TI_ASSERT(stmt->width() == 1);
+ const auto dt = opengl_data_type_name(stmt->element_type().ptr_removed());
+ emit("const int {} = {}", stmt->raw_name(), stmt->offset);
+ ptr_to_buffers_[stmt] = BuffersEnum::GlobalTmps;
+ }
+
+ void visit(ExternalPtrStmt *stmt) override {
+ // Used mostly for transferring data between host (e.g. numpy array) and
+ // Vulkan.
+ TI_ASSERT(stmt->width() == 1);
+ const auto linear_offset_name =
+ fmt::format("{}_linear_mem_offset_", stmt->raw_name());
+ emit("int {} = 0;", linear_offset_name);
+ emit("{{");
+ {
+ ScopedIndent s(current_appender());
+ const auto *argload = stmt->base_ptrs[0]->as();
+ const int arg_id = argload->arg_id;
+ const int num_indices = stmt->indices.size();
+ std::vector size_var_names;
+ const auto extra_args_mem_offset = ctx_attribs_->extra_args_mem_offset();
+ const auto extra_args_index_base =
+ (extra_args_mem_offset / sizeof(int32_t));
+ emit("// External ptr, extra args: mem_offset={} index_base={}",
+ extra_args_mem_offset, extra_args_index_base);
+ for (int i = 0; i < num_indices; i++) {
+ std::string var_name = fmt::format("{}_size{}_", stmt->raw_name(), i);
+ const auto extra_arg_linear_index_offset =
+ (arg_id * taichi_max_num_indices) + i;
+ const auto extra_arg_linear_index =
+ extra_args_index_base + extra_arg_linear_index_offset;
+ emit("// Extra arg: arg_id={} i={} linear_index=({} + {})={}", arg_id,
+ i, extra_args_index_base, extra_arg_linear_index_offset,
+ extra_arg_linear_index);
+ emit("const int {} = {}[{}];", var_name, kContextBufferName,
+ extra_arg_linear_index);
+ size_var_names.push_back(std::move(var_name));
+ }
+ for (int i = 0; i < num_indices; i++) {
+ emit("{} *= {};", linear_offset_name, size_var_names[i]);
+ emit("{} += {};", linear_offset_name, stmt->indices[i]->raw_name());
+ }
+ emit("// Convert index to bytes");
+ emit("{} = ({} << 2);", linear_offset_name, linear_offset_name);
+ }
+ emit("}}");
+ emit("const int {} = ({} + {});", stmt->raw_name(),
+ stmt->base_ptrs[0]->raw_name(), linear_offset_name);
+ ptr_to_buffers_[stmt] = BuffersEnum::Context;
+ }
+
+ void visit(UnaryOpStmt *stmt) override {
+ const auto dt_name = opengl_data_type_name(stmt->element_type());
+ const auto var_decl = fmt::format("const {} {}", dt_name, stmt->raw_name());
+ const auto operand_name = stmt->operand->raw_name();
+
+ if (stmt->op_type == UnaryOpType::logic_not) {
+ emit("{} = {}({} == 0);", var_decl, dt_name, operand_name);
+ } else if (stmt->op_type == UnaryOpType::neg) {
+ emit("{} = -{}({});", var_decl, dt_name, operand_name);
+ } else if (stmt->op_type == UnaryOpType::rsqrt) {
+ emit("{} = {}(inversesqrt({}));", var_decl, dt_name, operand_name);
+ } else if (stmt->op_type == UnaryOpType::sgn) {
+ emit("{} = {}(sign({}));", var_decl, dt_name, operand_name);
+ } else if (stmt->op_type == UnaryOpType::bit_not) {
+ emit("{} = ~{}({});", var_decl, dt_name, operand_name);
+ } else if (stmt->op_type == UnaryOpType::cast_value) {
+ emit("{} = {}({});", var_decl, dt_name, operand_name);
+ } else if (stmt->op_type == UnaryOpType::cast_bits) {
+ constexpr int kFloatingPoint = 0;
+ constexpr int kSignedInteger = 1;
+ constexpr int kUnsignedInteger = 2;
+
+ const auto dst_type = stmt->cast_type;
+ const auto src_type = stmt->operand->element_type();
+ auto dst_type_id = kFloatingPoint;
+ if (is_integral(dst_type)) {
+ dst_type_id = is_unsigned(dst_type) ? kUnsignedInteger : kSignedInteger;
+ }
+ auto src_type_id = kFloatingPoint;
+ if (is_integral(src_type)) {
+ src_type_id = is_unsigned(src_type) ? kUnsignedInteger : kSignedInteger;
+ }
+
+ TI_ASSERT_INFO(
+ data_type_size(dst_type) == data_type_size(src_type),
+ "bit_cast is only supported between data type with same size");
+
+ if (dst_type_id != kFloatingPoint && src_type_id != kFloatingPoint) {
+ emit("{} = {}({});", var_decl, dt_name, operand_name);
+ } else if (dst_type_id == kFloatingPoint &&
+ src_type_id == kSignedInteger) {
+ emit("{} = intBitsToFloat({});", var_decl, operand_name);
+ } else if (dst_type_id == kSignedInteger &&
+ src_type_id == kFloatingPoint) {
+ emit("{} = floatBitsToInt({});", var_decl, operand_name);
+ } else if (dst_type_id == kFloatingPoint &&
+ src_type_id == kUnsignedInteger) {
+ emit("{} = uintBitsToFloat({});", var_decl, operand_name);
+ } else if (dst_type_id == kUnsignedInteger &&
+ src_type_id == kFloatingPoint) {
+ emit("{} = floatBitsToUint({});", var_decl, operand_name);
+ } else {
+ TI_ERROR("[glsl] unsupported bit cast from {} to {}",
+ data_type_name(src_type), data_type_name(dst_type));
+ }
+ } else {
+ emit("{} = {}({});", var_decl, unary_op_type_name(stmt->op_type),
+ operand_name);
+ }
+ }
+
+ void visit(BinaryOpStmt *bin) override {
+ const auto dt_name = opengl_data_type_name(bin->element_type());
+ const auto lhs_name = bin->lhs->raw_name();
+ const auto rhs_name = bin->rhs->raw_name();
+ const auto bin_name = bin->raw_name();
+ const auto op_type = bin->op_type;
+ const auto var_decl = fmt::format("const {} {}", dt_name, bin_name);
+ if (op_type == BinaryOpType::floordiv) {
+ if (is_integral(bin->lhs->element_type()) &&
+ is_integral(bin->rhs->element_type())) {
+ emit(
+ "{} = {}(sign({}) * {} >= 0 ? abs({}) / abs({}) : "
+ "sign({}) * "
+ "(abs({}) + abs({}) - 1) / {});",
+ var_decl, dt_name, lhs_name, rhs_name, lhs_name, rhs_name, lhs_name,
+ lhs_name, rhs_name, rhs_name);
+ } else {
+ emit("{} = floor({} / {});", var_decl, lhs_name, rhs_name);
+ }
+ return;
+ }
+ if (bin->op_type == BinaryOpType::mod) {
+ // NOTE: the GLSL built-in function `mod()` is a pythonic mod: x - y *
+ // floor(x / y)
+ emit("{} = {} - {} * int({} / {});", var_decl, lhs_name, rhs_name,
+ lhs_name, rhs_name);
+ return;
+ }
+
+ const auto binop = binary_op_type_symbol(bin->op_type);
+ if (opengl::is_opengl_binary_op_infix(op_type)) {
+ if (is_comparison(op_type)) {
+ // TODO(#577): Taichi uses -1 as true due to LLVM i1.
+ emit(" {} = -{}({} {} {});", var_decl, dt_name, lhs_name, binop,
+ rhs_name);
+ } else {
+ emit("{} = {}({} {} {});", var_decl, dt_name, lhs_name, binop,
+ rhs_name);
+ }
+ } else {
+ // This is a function call
+ emit("{} = {}({}, {});", var_decl, binop, lhs_name, rhs_name);
+ }
+ }
+
+ void visit(TernaryOpStmt *tri) override {
+ TI_ASSERT(tri->op_type == TernaryOpType::select);
+ emit("const {} {} = ({}) ? ({}) : ({});",
+ opengl_data_type_name(tri->element_type()), tri->raw_name(),
+ tri->op1->raw_name(), tri->op2->raw_name(), tri->op3->raw_name());
+ }
+
+ void visit(AtomicOpStmt *stmt) override {
+ TI_ASSERT(stmt->width() == 1);
+ if (stmt->op_type != AtomicOpType::add) {
+ TI_NOT_IMPLEMENTED;
+ }
+ const auto dt = stmt->dest->element_type().ptr_removed();
+ std::string func = "atomicAdd"; // GLSL builtin
+ std::string mem = at_buffer(stmt->dest, dt);
+ if (dt->is_primitive(PrimitiveTypeID::f32)) {
+ // Buffer has to be specified in the fatomicAdd helpers.
+ const std::string buffer_name =
+ buffer_instance_name(ptr_to_buffers_.at(stmt->dest));
+ func = fmt::format("fatomicAdd_{}", buffer_name);
+ mem = vk_data_address_shifter(stmt->dest, dt);
+ } else if (!is_integral(dt)) {
+ TI_ERROR("Vulkan only supports 32-bit atomic data types");
+ }
+ // const dt stmt = atomicAdd(mem, val);
+ emit("const {} {} = {}({}, {});", opengl_data_type_name(dt),
+ stmt->raw_name(), func, mem, stmt->val->raw_name());
+ }
+
+ void visit(IfStmt *if_stmt) override {
+ emit("if ({} != 0) {{", if_stmt->cond->raw_name());
+ if (if_stmt->true_statements) {
+ if_stmt->true_statements->accept(this);
+ }
+ emit("}} else {{");
+ if (if_stmt->false_statements) {
+ if_stmt->false_statements->accept(this);
+ }
+ emit("}}");
+ }
+
+ void visit(RangeForStmt *for_stmt) override {
+ TI_ASSERT(for_stmt->width() == 1);
+ auto loop_var_name = for_stmt->raw_name();
+ if (!for_stmt->reversed) {
+ emit("for (int {}_ = {}; {}_ < {}; {}_ = {}_ + {}) {{", loop_var_name,
+ for_stmt->begin->raw_name(), loop_var_name,
+ for_stmt->end->raw_name(), loop_var_name, loop_var_name, 1);
+ emit(" int {} = {}_;", loop_var_name, loop_var_name);
+ } else {
+ // reversed for loop
+ emit("for (int {}_ = {} - 1; {}_ >= {}; {}_ = {}_ - {}) {{",
+ loop_var_name, for_stmt->end->raw_name(), loop_var_name,
+ for_stmt->begin->raw_name(), loop_var_name, loop_var_name, 1);
+ emit(" int {} = {}_;", loop_var_name, loop_var_name);
+ }
+ for_stmt->body->accept(this);
+ emit("}}");
+ }
+
+ void visit(WhileStmt *stmt) override {
+ emit("while (true) {{");
+ stmt->body->accept(this);
+ emit("}}");
+ }
+
+ void visit(WhileControlStmt *stmt) override {
+ emit("if ({} == 0) break;", stmt->cond->raw_name());
+ }
+
+ void visit(ContinueStmt *stmt) override {
+ if (stmt->as_return()) {
+ emit("return;");
+ } else {
+ emit("continue;");
+ }
+ }
+
+ private:
+ void emit_headers() {
+ SectionGuard sg(this, Section::Headers);
+
+ emit("#version 450");
+ emit("layout(local_size_x={}, local_size_y=1, local_size_z=1) in;",
+ task_attribs_.advisory_num_threads_per_group);
+ emit("");
+ for (const auto &bb : task_attribs_.buffer_binds) {
+ // e.g.
+ // layout(std430, binding=0) buffer Root { int root_buffer[]; };
+ emit("layout(std430, binding={}) buffer {} {{ int {}[]; }};", bb.binding,
+ TaskAttributes::buffers_name(bb.type),
+ buffer_instance_name(bb.type));
+ }
+ emit("");
+ emit("// Helpers");
+ current_appender().append_raw(kVulkanAtomicsSourceCode);
+ }
+
+ void generate_serial_kernel(OffloadedStmt *stmt) {
+ task_attribs_.name = task_name_;
+ task_attribs_.task_type = OffloadedTaskType::serial;
+ task_attribs_.buffer_binds = get_common_buffer_binds();
+ task_attribs_.advisory_total_num_threads = 1;
+ task_attribs_.advisory_num_threads_per_group = 1;
+
+ const auto func_name = single_work_func_name();
+ // The computation for a single work is wrapped inside a function, so that
+ // we can do grid-strided loop.
+ emit_single_work_func_def(func_name, stmt->body.get());
+ // The actual compute kernel entry point.
+ emit("void main() {{");
+ {
+ ScopedIndent s(current_appender());
+ emit("// serial");
+ emit("if ({} > 0) return;", kGlobalInvocationIDName);
+
+ emit_call_single_work_func(func_name, /*loop_index_expr=*/"0");
+ }
+ // Close kernel
+ emit("}}\n");
+ }
+
+ void generate_range_for_kernel(OffloadedStmt *stmt) {
+ task_attribs_.name = task_name_;
+ task_attribs_.task_type = OffloadedTaskType::range_for;
+ task_attribs_.buffer_binds = get_common_buffer_binds();
+
+ task_attribs_.range_for_attribs = TaskAttributes::RangeForAttributes();
+ auto &range_for_attribs = task_attribs_.range_for_attribs.value();
+ range_for_attribs.const_begin = stmt->const_begin;
+ range_for_attribs.const_end = stmt->const_end;
+ range_for_attribs.begin =
+ (stmt->const_begin ? stmt->begin_value : stmt->begin_offset);
+ range_for_attribs.end =
+ (stmt->const_end ? stmt->end_value : stmt->end_offset);
+
+ const auto func_name = single_work_func_name();
+ emit_single_work_func_def(func_name, stmt->body.get());
+
+ emit("void main() {{");
+ push_indent();
+ const std::string total_elems_name("total_elems");
+ std::string begin_expr;
+ if (range_for_attribs.const_range()) {
+ const int num_elems = range_for_attribs.end - range_for_attribs.begin;
+ begin_expr = std::to_string(stmt->begin_value);
+ emit("// range_for, range known at compile time");
+ emit("const int {} = {};", total_elems_name, num_elems);
+ task_attribs_.advisory_total_num_threads = num_elems;
+ } else {
+ TI_NOT_IMPLEMENTED;
+ }
+ // begin_ = thread_id + begin_expr
+ emit("const int begin_ = {} + {};", kGlobalInvocationIDName, begin_expr);
+ // end_ = total_elems + begin_expr
+ emit("const int end_ = {} + {};", total_elems_name, begin_expr);
+ // For now, |total_invocs_name| is equal to |total_elems|. Once we support
+ // dynamic range, they will be different.
+ const std::string total_invocs_name = "total_invocs";
+ // https://www.khronos.org/opengl/wiki/Compute_Shader#Inputs
+ emit("const int {} = int(gl_NumWorkGroups.x * gl_WorkGroupSize.x);",
+ total_invocs_name);
+ // grid-strided loop
+ emit("for (int ii = begin_; ii < end_; ii += {}) {{", total_invocs_name);
+ {
+ ScopedIndent s2(current_appender());
+ emit_call_single_work_func(func_name, /*loop_index_expr=*/"ii");
+ }
+ emit("}}"); // closes for loop
+
+ pop_indent();
+ // Close kernel
+ emit("}}\n");
+ // TODO: runtime needs to verify if block_dim is feasible
+ task_attribs_.advisory_num_threads_per_group = stmt->block_dim;
+ }
+
+ void emit_single_work_func_def(const std::string &func_name,
+
+ Block *func_ir) {
+ emit("void {}(", func_name);
+ emit(" const int {}) {{", kLinearLoopIndexName);
+ // We do not need additional indentation, because |func_ir| itself is a
+ // block, which will be indented automatically.
+ func_ir->accept(this);
+ emit("}}\n"); // closes this function
+ }
+
+ void emit_call_single_work_func(const std::string &func_name,
+
+ const std::string &loop_index_expr) {
+ emit("{}({});", func_name, loop_index_expr);
+ }
+
+ std::string at_buffer(const Stmt *ptr, DataType dt) const {
+ const std::string buffer_name =
+ buffer_instance_name(ptr_to_buffers_.at(ptr));
+ return fmt::format("{}[{}]", buffer_name, vk_data_address_shifter(ptr, dt));
+ }
+
+ std::string single_work_func_name() const {
+ return task_name_ + "_func";
+ }
+
+ std::vector get_common_buffer_binds() const {
+ std::vector result;
+ int binding = 0;
+ result.push_back({BuffersEnum::Root, binding++});
+ result.push_back({BuffersEnum::GlobalTmps, binding++});
+ if (!ctx_attribs_->empty()) {
+ result.push_back({BuffersEnum::Context, binding++});
+ }
+ return result;
+ }
+
+ class SectionGuard {
+ public:
+ SectionGuard(TaskCodegen *tcg, Section new_sec)
+ : tcg_(tcg), saved_(tcg->code_section_) {
+ tcg_->code_section_ = new_sec;
+ }
+
+ ~SectionGuard() {
+ tcg_->code_section_ = saved_;
+ }
+
+ private:
+ TaskCodegen *const tcg_;
+ const Section saved_;
+ };
+
+ friend class SectionGuard;
+
+ template
+ void emit(std::string f, Args &&... args) {
+ // TI_INFO(f, args...);
+ current_appender().append(std::move(f), std::forward(args)...);
+ }
+
+ void push_indent() {
+ current_appender().push_indent();
+ }
+
+ void pop_indent() {
+ current_appender().pop_indent();
+ }
+
+ LineAppender ¤t_appender() {
+ return section_appenders_[code_section_];
+ }
+
+ OffloadedStmt *const task_ir_; // not owned
+ const CompiledSNodeStructs *const compiled_structs_; // not owned
+ const KernelContextAttributes *const ctx_attribs_; // not owned
+ const std::string task_name_;
+
+ TaskAttributes task_attribs_;
+ GetRootStmt *root_stmt_{nullptr};
+ std::unordered_map ptr_to_buffers_;
+ Section code_section_{Section::Kernels};
+ std::unordered_map section_appenders_;
+};
+
+class KernelCodegen {
+ public:
+ struct Params {
+ std::string ti_kernel_name;
+ Kernel *kernel;
+ const CompiledSNodeStructs *compiled_structs;
+ };
+
+ explicit KernelCodegen(const Params ¶ms)
+ : params_(params), ctx_attribs_(*params.kernel) {
+ }
+
+ using Result = VkRuntime::RegisterParams;
+
+ Result run() {
+ Result res;
+ auto &kernel_attribs = res.kernel_attribs;
+ auto *root = params_.kernel->ir->as();
+ auto &tasks = root->statements;
+ for (int i = 0; i < tasks.size(); ++i) {
+ TaskCodegen::Params tp;
+ tp.task_ir = tasks[i]->as();
+ tp.task_id_in_kernel = i;
+ tp.compiled_structs = params_.compiled_structs;
+ tp.ctx_attribs = &ctx_attribs_;
+ tp.ti_kernel_name = params_.ti_kernel_name;
+
+ TaskCodegen cgen(tp);
+ auto task_res = cgen.run();
+ kernel_attribs.tasks_attribs.push_back(std::move(task_res.task_attribs));
+ res.task_glsl_source_codes.push_back(std::move(task_res.source_code));
+ }
+ kernel_attribs.ctx_attribs = std::move(ctx_attribs_);
+ kernel_attribs.name = params_.ti_kernel_name;
+ kernel_attribs.is_jit_evaluator = params_.kernel->is_evaluator;
+ return res;
+ }
+
+ private:
+ Params params_;
+ KernelContextAttributes ctx_attribs_;
+};
+
+} // namespace
+
+void lower(Kernel *kernel) {
+ auto &config = kernel->program->config;
+ config.demote_dense_struct_fors = true;
+ irpass::compile_to_executable(kernel->ir.get(), config, kernel,
+ /*vectorize=*/false, kernel->grad,
+ /*ad_use_stack=*/false, config.print_ir,
+ /*lower_global_access=*/true,
+ /*make_thread_local=*/false);
+}
+
+FunctionType compile_to_executable(Kernel *kernel,
+ const CompiledSNodeStructs *compiled_structs,
+ VkRuntime *runtime) {
+ const auto id = Program::get_kernel_id();
+ const auto taichi_kernel_name(fmt::format("{}_k{:04d}_vk", kernel->name, id));
+ TI_INFO("VK codegen for Taichi kernel={}", taichi_kernel_name);
+ KernelCodegen::Params params;
+ params.ti_kernel_name = taichi_kernel_name;
+ params.kernel = kernel;
+ params.compiled_structs = compiled_structs;
+ KernelCodegen codegen(params);
+ auto res = codegen.run();
+ auto handle = runtime->register_taichi_kernel(std::move(res));
+ return [runtime, handle, taichi_kernel_name](Context &ctx) {
+ runtime->launch_kernel(handle, &ctx);
+ };
+}
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/codegen_vulkan.h b/taichi/backends/vulkan/codegen_vulkan.h
new file mode 100644
index 000000000..64210d95e
--- /dev/null
+++ b/taichi/backends/vulkan/codegen_vulkan.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "taichi/lang_util.h"
+
+#include "taichi/backends/vulkan/snode_struct_compiler.h"
+
+namespace taichi {
+namespace lang {
+
+class Kernel;
+
+namespace vulkan {
+
+class VkRuntime;
+
+void lower(Kernel *kernel);
+
+// These ASTs must have already been lowered at the CHI level.
+FunctionType compile_to_executable(Kernel *kernel,
+ const CompiledSNodeStructs *compiled_structs,
+ VkRuntime *runtime);
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/shaders/atomics.glsl.h b/taichi/backends/vulkan/shaders/atomics.glsl.h
new file mode 100644
index 000000000..1c183b6f4
--- /dev/null
+++ b/taichi/backends/vulkan/shaders/atomics.glsl.h
@@ -0,0 +1,44 @@
+// clang-format on
+#include "taichi/backends/vulkan/shaders/prologue.h"
+
+#ifndef TI_INSIDE_VULKAN_CODEGEN
+static_assert(false, "do not include");
+#endif // TI_INSIDE_VULKAN_CODEGEN
+
+#define VULKAN_BEGIN_CODE_DEF constexpr auto kVulkanAtomicsSourceCode =
+#define VULKAN_END_CODE_DEF ;
+
+// clang-format off
+VULKAN_BEGIN_CODE_DEF
+STR(
+// TODO: don't duplicate, pass in pointer
+float fatomicAdd_root_buffer(int addr, float data) {
+ int old_val = 0;
+ int new_val = 0;
+ int cas_val = 0;
+ int ok = 0;
+ while (ok == 0) {
+ old_val = root_buffer[addr];
+ new_val = floatBitsToInt(intBitsToFloat(old_val) + data);
+ cas_val = atomicCompSwap(root_buffer[addr], old_val, new_val);
+ ok = int(cas_val == old_val);
+ }
+ return intBitsToFloat(old_val);
+}
+
+float fatomicAdd_global_tmps_buffer(int addr, float data) {
+ int old_val = 0;
+ int new_val = 0;
+ int cas_val = 0;
+ int ok = 0;
+ while (ok == 0) {
+ old_val = global_tmps_buffer[addr];
+ new_val = floatBitsToInt(intBitsToFloat(old_val) + data);
+ cas_val = atomicCompSwap(global_tmps_buffer[addr], old_val, new_val);
+ ok = int(cas_val == old_val);
+ }
+ return intBitsToFloat(old_val);
+}
+)
+VULKAN_END_CODE_DEF
+// clang-format on
diff --git a/taichi/backends/vulkan/shaders/epilogue.h b/taichi/backends/vulkan/shaders/epilogue.h
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/taichi/backends/vulkan/shaders/epilogue.h
@@ -0,0 +1 @@
+
diff --git a/taichi/backends/vulkan/shaders/prologue.h b/taichi/backends/vulkan/shaders/prologue.h
new file mode 100644
index 000000000..bbacdf2ef
--- /dev/null
+++ b/taichi/backends/vulkan/shaders/prologue.h
@@ -0,0 +1,28 @@
+#ifdef TI_INSIDE_VULKAN_CODEGEN
+
+#include "taichi/util/macros.h"
+
+#else
+
+#define STR(...) __VA_ARGS__
+
+#define inout
+
+// GLSL builtin stubs
+int floatBitsToInt(float f) {
+ return *reinterpret_cast(&f);
+}
+
+int intBitsToFloat(float f) {
+ return *reinterpret_cast(&f);
+}
+
+int atomicCompSwap(int &mem, int compare, int data) {
+ const int old = mem;
+ if (mem == compare) {
+ mem = data;
+ }
+ return old;
+}
+
+#endif // TI_INSIDE_VULKAN_CODEGEN
diff --git a/taichi/backends/vulkan/snode_struct_compiler.cpp b/taichi/backends/vulkan/snode_struct_compiler.cpp
new file mode 100644
index 000000000..e586cd582
--- /dev/null
+++ b/taichi/backends/vulkan/snode_struct_compiler.cpp
@@ -0,0 +1,90 @@
+#include "taichi/backends/vulkan/snode_struct_compiler.h"
+
+#include "taichi/backends/vulkan/data_type_utils.h"
+
+namespace taichi {
+namespace lang {
+namespace vulkan {
+namespace {
+
+class StructCompiler {
+ public:
+ CompiledSNodeStructs run(const SNode &root) {
+ TI_ASSERT(root.type == SNodeType::root);
+
+ CompiledSNodeStructs result;
+ result.root_size = compute_snode_size(&root);
+ result.snode_descriptors = std::move(snode_descriptors_);
+ TI_INFO("Vulkan RootBuffer size={}", result.root_size);
+ return result;
+ }
+
+ private:
+ std::size_t compute_snode_size(const SNode *sn) {
+ const bool is_place = sn->is_place();
+
+ SNodeDescriptor sn_desc;
+ sn_desc.snode = sn;
+ if (is_place) {
+ sn_desc.cell_stride = vk_data_type_size(sn->dt);
+ sn_desc.container_stride = sn_desc.cell_stride;
+ } else {
+ std::size_t cell_stride = 0;
+ for (const auto &ch : sn->ch) {
+ const auto child_offset = cell_stride;
+ const auto *ch_snode = ch.get();
+ cell_stride += compute_snode_size(ch_snode);
+ snode_descriptors_.find(ch_snode->id)
+ ->second.mem_offset_in_parent_cell = child_offset;
+ }
+ sn_desc.cell_stride = cell_stride;
+ sn_desc.container_stride =
+ cell_stride * sn_desc.cells_per_container_pot();
+ }
+
+ sn_desc.total_num_cells_from_root = 1;
+ for (const auto &e : sn->extractors) {
+ // Note that the extractors are set in two places:
+ // 1. When a new SNode is first defined
+ // 2. StructCompiler::infer_snode_properties()
+ // The second step is the finalized result.
+ sn_desc.total_num_cells_from_root *= e.num_elements;
+ }
+
+ TI_INFO("SNodeDescriptor");
+ TI_INFO("* snode={}", sn_desc.snode->id);
+ TI_INFO("* type={} (is_place={})", sn_desc.snode->node_type_name, is_place);
+ TI_INFO("* cell_stride={}", sn_desc.cell_stride);
+ TI_INFO("* cells_per_container_pot={}", sn_desc.cells_per_container_pot());
+ TI_INFO("* container_stride={}", sn_desc.container_stride);
+ TI_INFO("* total_num_cells_from_root={}",
+ sn_desc.total_num_cells_from_root);
+ TI_INFO("");
+
+ TI_ASSERT(snode_descriptors_.find(sn->id) == snode_descriptors_.end());
+ snode_descriptors_[sn->id] = sn_desc;
+ return sn_desc.container_stride;
+ }
+
+ SNodeDescriptorsMap snode_descriptors_;
+};
+
+} // namespace
+
+int SNodeDescriptor::cells_per_container_pot() const {
+ // For root, |snode->n| is 0.
+ const auto ty = snode->type;
+ if (ty == SNodeType::root || ty == SNodeType::place) {
+ return 1;
+ }
+ return snode->n;
+}
+
+CompiledSNodeStructs compile_snode_structs(const SNode &root) {
+ StructCompiler compiler;
+ return compiler.run(root);
+}
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/snode_struct_compiler.h b/taichi/backends/vulkan/snode_struct_compiler.h
new file mode 100644
index 000000000..426c96ee0
--- /dev/null
+++ b/taichi/backends/vulkan/snode_struct_compiler.h
@@ -0,0 +1,53 @@
+// Codegen for the hierarchical data structure
+#pragma once
+
+#include
+
+#include "taichi/ir/snode.h"
+
+namespace taichi {
+namespace lang {
+namespace vulkan {
+
+struct SNodeDescriptor {
+ const SNode *snode = nullptr;
+ // Stride (bytes) of a single cell.
+ int cell_stride = 0;
+
+ // Number of cells per container, padded to Power of Two (pot).
+ int cells_per_container_pot() const;
+
+ // Bytes of a single container.
+ int container_stride = 0;
+
+ // Total number of CELLS of this SNode, NOT padded to PoT.
+ // For example, for a layout of
+ // ti.root
+ // .dense(ti.ij, (3, 2)) // S1
+ // .dense(ti.ij, (5, 3)) // S2
+ // |total_num_cells_from_root| for S2 is 3x2x5x3 = 90. That is, S2 has a total
+ // of 90 cells. Note that the number of S2 (container) itself is 3x2=6!
+ int total_num_cells_from_root = 0;
+ // An SNode can have multiple number of components, where each component
+ // starts at a fixed offset in its parent cell's memory.
+ int mem_offset_in_parent_cell = 0;
+
+ SNode *get_child(int ch_i) const {
+ return snode->ch[ch_i].get();
+ }
+};
+
+using SNodeDescriptorsMap = std::unordered_map;
+
+struct CompiledSNodeStructs {
+ // Root buffer size in bytes.
+ size_t root_size;
+ // Map from SNode ID to its descriptor.
+ SNodeDescriptorsMap snode_descriptors;
+};
+
+CompiledSNodeStructs compile_snode_structs(const SNode &root);
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/vulkan_simple_memory_pool.cpp b/taichi/backends/vulkan/vulkan_simple_memory_pool.cpp
new file mode 100644
index 000000000..72ee792e5
--- /dev/null
+++ b/taichi/backends/vulkan/vulkan_simple_memory_pool.cpp
@@ -0,0 +1,143 @@
+#include "taichi/backends/vulkan/vulkan_simple_memory_pool.h"
+
+#include "taichi/math/arithmetic.h"
+#include "taichi/backends/vulkan/vulkan_common.h"
+#include "taichi/common/logging.h"
+
+namespace taichi {
+namespace lang {
+namespace vulkan {
+
+namespace {
+
+static constexpr VkDeviceSize kAlignment = 256;
+
+VkDeviceSize roundup_aligned(VkDeviceSize size) {
+ return iroundup(size, kAlignment);
+}
+
+} // namespace
+
+VkBufferWithMemory::VkBufferWithMemory(VkDevice device,
+ VkBuffer buffer,
+ VkDeviceMemory mem,
+ VkDeviceSize size,
+ VkDeviceSize offset)
+ : device_(device),
+ buffer_(buffer),
+ backing_memory_(mem),
+ size_(size),
+ offset_in_mem_(offset) {
+ TI_ASSERT(buffer_ != VK_NULL_HANDLE);
+ TI_ASSERT(size_ > 0);
+ TI_ASSERT(backing_memory_ != VK_NULL_HANDLE);
+}
+
+VkBufferWithMemory::~VkBufferWithMemory() {
+ if (buffer_ != VK_NULL_HANDLE) {
+ vkDestroyBuffer(device_, buffer_, kNoVkAllocCallbacks);
+ }
+}
+
+LinearVkMemoryPool::LinearVkMemoryPool(const Params ¶ms,
+ VkDeviceMemory mem,
+ uint32_t mti)
+ : device_(params.device),
+ memory_(mem),
+ memory_type_index_(mti),
+ compute_queue_family_index_(params.compute_queue_family_index),
+ buffer_creation_template_(params.buffer_creation_template),
+ pool_size_(params.pool_size),
+ next_(0) {
+ buffer_creation_template_.size = 0;
+ buffer_creation_template_.queueFamilyIndexCount = 1;
+ buffer_creation_template_.pQueueFamilyIndices = &compute_queue_family_index_;
+}
+
+LinearVkMemoryPool::~LinearVkMemoryPool() {
+ if (memory_ != VK_NULL_HANDLE) {
+ vkFreeMemory(device_, memory_, kNoVkAllocCallbacks);
+ }
+}
+
+// static
+std::unique_ptr LinearVkMemoryPool::try_make(
+ Params params) {
+ params.pool_size = roundup_aligned(params.pool_size);
+
+ VkMemoryAllocateInfo alloc_info{};
+ alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+ alloc_info.allocationSize = params.pool_size;
+ const auto mem_type_index = find_memory_type(params);
+ if (!mem_type_index.has_value()) {
+ return nullptr;
+ }
+ alloc_info.memoryTypeIndex = mem_type_index.value();
+ VkDeviceMemory mem;
+ if (vkAllocateMemory(params.device, &alloc_info, kNoVkAllocCallbacks, &mem) !=
+ VK_SUCCESS) {
+ return nullptr;
+ }
+ return std::make_unique(params, mem,
+ alloc_info.memoryTypeIndex);
+}
+
+std::unique_ptr LinearVkMemoryPool::alloc_and_bind(
+ VkDeviceSize buf_size) {
+ buf_size = roundup_aligned(buf_size);
+ if (pool_size_ <= (next_ + buf_size)) {
+ TI_WARN("Vulkan memory pool exhausted, max size={}", pool_size_);
+ return nullptr;
+ }
+
+ VkBuffer buffer;
+ buffer_creation_template_.size = buf_size;
+ BAIL_ON_VK_BAD_RESULT(vkCreateBuffer(device_, &buffer_creation_template_,
+ kNoVkAllocCallbacks, &buffer),
+ "failed to create buffer");
+ buffer_creation_template_.size = 0; // reset
+ const auto offset_in_mem = next_;
+ next_ += buf_size;
+ BAIL_ON_VK_BAD_RESULT(
+ vkBindBufferMemory(device_, buffer, memory_, offset_in_mem),
+ "failed to bind buffer to memory");
+
+ VkMemoryRequirements mem_requirements;
+ vkGetBufferMemoryRequirements(device_, buffer, &mem_requirements);
+ TI_ASSERT(mem_requirements.memoryTypeBits & (1 << memory_type_index_));
+ TI_ASSERT_INFO((buf_size % mem_requirements.alignment) == 0,
+ "buf_size={} required alignment={}", buf_size,
+ mem_requirements.alignment);
+ return std::make_unique(device_, buffer, memory_,
+ buf_size, offset_in_mem);
+}
+
+// static
+std::optional LinearVkMemoryPool::find_memory_type(
+ const Params ¶ms) {
+ VkPhysicalDeviceMemoryProperties mem_properties;
+ vkGetPhysicalDeviceMemoryProperties(params.physical_device, &mem_properties);
+ auto satisfies = [&](int i) -> bool {
+ const auto &mem_type = mem_properties.memoryTypes[i];
+ if ((mem_type.propertyFlags & params.required_properties) !=
+ params.required_properties) {
+ return false;
+ }
+ if (mem_properties.memoryHeaps[mem_type.heapIndex].size <=
+ params.pool_size) {
+ return false;
+ }
+ return true;
+ };
+
+ for (int i = 0; i < mem_properties.memoryTypeCount; ++i) {
+ if (satisfies(i)) {
+ return i;
+ }
+ }
+ return std::nullopt;
+}
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/vulkan_simple_memory_pool.h b/taichi/backends/vulkan/vulkan_simple_memory_pool.h
new file mode 100644
index 000000000..9e1023f0a
--- /dev/null
+++ b/taichi/backends/vulkan/vulkan_simple_memory_pool.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include
+#include
+
+#include
+#include
+
+namespace taichi {
+namespace lang {
+namespace vulkan {
+
+class VkBufferWithMemory {
+ public:
+ VkBufferWithMemory(VkDevice device,
+ VkBuffer buffer,
+ VkDeviceMemory mem,
+ VkDeviceSize size,
+ VkDeviceSize offset);
+
+ // Just use std::unique_ptr to save all the trouble from crafting move ctors
+ // on our own
+ VkBufferWithMemory(const VkBufferWithMemory &) = delete;
+ VkBufferWithMemory &operator=(const VkBufferWithMemory &) = delete;
+ VkBufferWithMemory(VkBufferWithMemory &&) = delete;
+ VkBufferWithMemory &operator=(VkBufferWithMemory &&) = delete;
+
+ ~VkBufferWithMemory();
+
+ VkBuffer buffer() const {
+ return buffer_;
+ }
+
+ VkDeviceSize size() const {
+ return size_;
+ }
+
+ VkDeviceSize offset_in_mem() const {
+ return offset_in_mem_;
+ }
+
+ class Mapped {
+ public:
+ explicit Mapped(VkBufferWithMemory *buf) : buf_(buf), data_(nullptr) {
+ vkMapMemory(buf_->device_, buf_->backing_memory_, buf_->offset_in_mem(),
+ buf_->size(), /*flags=*/0, &data_);
+ }
+
+ ~Mapped() {
+ vkUnmapMemory(buf_->device_, buf_->backing_memory_);
+ }
+
+ void *data() const {
+ return data_;
+ }
+
+ private:
+ VkBufferWithMemory *const buf_; // not owned
+ void *data_;
+ };
+
+ Mapped map_mem() {
+ return Mapped(this);
+ }
+
+ private:
+ friend class Mapped;
+
+ VkDevice device_{VK_NULL_HANDLE};
+ VkBuffer buffer_{VK_NULL_HANDLE};
+ VkDeviceMemory backing_memory_{VK_NULL_HANDLE};
+ VkDeviceSize size_{0};
+ VkDeviceSize offset_in_mem_{0};
+};
+
+// TODO: Use
+// https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/custom_memory_pools.html
+class LinearVkMemoryPool {
+ public:
+ struct Params {
+ VkPhysicalDevice physical_device{VK_NULL_HANDLE};
+ VkDevice device{VK_NULL_HANDLE};
+ VkMemoryPropertyFlags required_properties;
+ VkDeviceSize pool_size{0};
+ uint32_t compute_queue_family_index{0};
+ VkBufferCreateInfo buffer_creation_template{};
+ };
+
+ LinearVkMemoryPool(const Params ¶ms, VkDeviceMemory mem, uint32_t mti);
+
+ ~LinearVkMemoryPool();
+
+ static std::unique_ptr try_make(Params params);
+
+ std::unique_ptr alloc_and_bind(VkDeviceSize buf_size);
+
+ private:
+ static std::optional find_memory_type(const Params ¶ms);
+
+ VkDevice device_{VK_NULL_HANDLE}; // not owned
+ VkDeviceMemory memory_{VK_NULL_HANDLE};
+ uint32_t memory_type_index_{0};
+ uint32_t compute_queue_family_index_{0};
+ VkBufferCreateInfo buffer_creation_template_{};
+ VkDeviceSize pool_size_{0};
+ VkDeviceSize next_{0};
+};
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/vulkan_utils.cpp b/taichi/backends/vulkan/vulkan_utils.cpp
new file mode 100644
index 000000000..d0fc7d60f
--- /dev/null
+++ b/taichi/backends/vulkan/vulkan_utils.cpp
@@ -0,0 +1,54 @@
+#include "taichi/backends/vulkan/vulkan_utils.h"
+
+#include
+
+namespace taichi {
+namespace lang {
+namespace vulkan {
+
+std::vector GetInstanceExtensionProperties() {
+ constexpr char *kNoLayerName = nullptr;
+ uint32_t count = 0;
+ vkEnumerateInstanceExtensionProperties(kNoLayerName, &count, nullptr);
+ std::vector extensions(count);
+ vkEnumerateInstanceExtensionProperties(kNoLayerName, &count,
+ extensions.data());
+ return extensions;
+}
+
+std::vector GetDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice) {
+ constexpr char *kNoLayerName = nullptr;
+ uint32_t count = 0;
+ vkEnumerateDeviceExtensionProperties(physicalDevice, kNoLayerName, &count,
+ nullptr);
+ std::vector extensions(count);
+ vkEnumerateDeviceExtensionProperties(physicalDevice, kNoLayerName, &count,
+ extensions.data());
+ return extensions;
+}
+
+GlslToSpirvCompiler::GlslToSpirvCompiler(const ErrorHandler &err_handler)
+ : err_handler_(err_handler) {
+ opts_.SetTargetEnvironment(shaderc_target_env_vulkan,
+ VulkanEnvSettings::kShadercEnvVersion());
+ opts_.SetOptimizationLevel(shaderc_optimization_level_performance);
+}
+
+std::optional GlslToSpirvCompiler::compile(
+ const std::string &glsl_src,
+ const std::string &shader_name) {
+ auto spv_result =
+ compiler_.CompileGlslToSpv(glsl_src, shaderc_glsl_default_compute_shader,
+ /*input_file_name=*/shader_name.c_str(),
+ /*entry_point_name=*/"main", opts_);
+ if (spv_result.GetCompilationStatus() != shaderc_compilation_status_success) {
+ err_handler_(glsl_src, shader_name, spv_result.GetErrorMessage());
+ return std::nullopt;
+ }
+ return SpirvBinary(spv_result.begin(), spv_result.end());
+}
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/vulkan/vulkan_utils.h b/taichi/backends/vulkan/vulkan_utils.h
new file mode 100644
index 000000000..6f01e9f1f
--- /dev/null
+++ b/taichi/backends/vulkan/vulkan_utils.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+namespace taichi {
+namespace lang {
+
+namespace vulkan {
+
+std::vector GetInstanceExtensionProperties();
+
+std::vector GetDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice);
+
+class VulkanEnvSettings {
+ public:
+ static constexpr uint32_t kApiVersion() {
+ return VK_API_VERSION_1_0;
+ }
+
+ static constexpr shaderc_env_version kShadercEnvVersion() {
+ return shaderc_env_version_vulkan_1_0;
+ }
+};
+
+class GlslToSpirvCompiler {
+ public:
+ using SpirvBinary = std::vector;
+ using ErrorHandler = std::function;
+
+ explicit GlslToSpirvCompiler(const ErrorHandler &err_handler);
+
+ std::optional compile(const std::string &glsl_src,
+ const std::string &shader_name);
+
+ private:
+ shaderc::CompileOptions opts_;
+ shaderc::Compiler compiler_;
+ ErrorHandler err_handler_{nullptr};
+};
+
+} // namespace vulkan
+} // namespace lang
+} // namespace taichi
diff --git a/taichi/backends/wasm/codegen_wasm.cpp b/taichi/backends/wasm/codegen_wasm.cpp
index 1649f11e8..cf5d68224 100644
--- a/taichi/backends/wasm/codegen_wasm.cpp
+++ b/taichi/backends/wasm/codegen_wasm.cpp
@@ -13,6 +13,12 @@
namespace taichi {
namespace lang {
+namespace {
+constexpr std::array kPreloadedFuncNames = {
+ "wasm_materialize", "wasm_set_kernel_parameter_i32",
+ "wasm_set_kernel_parameter_f32"};
+}
+
class CodeGenLLVMWASM : public CodeGenLLVM {
public:
using IRVisitor::visit;
@@ -173,14 +179,16 @@ class CodeGenLLVMWASM : public CodeGenLLVM {
ir->accept(this);
finalize_taichi_kernel_function();
- auto wasm_materialize_name = "wasm_materialize";
-
// compile_module_to_executable
// only keep the current func
TaichiLLVMContext::eliminate_unused_functions(
- module.get(), [&](std::string func_name) {
- return offloaded_task_name == func_name ||
- wasm_materialize_name == func_name;
+ module.get(), [offloaded_task_name](const std::string &func_name) {
+ for (auto &name : kPreloadedFuncNames) {
+ if (std::string(name) == func_name) {
+ return true;
+ }
+ }
+ return func_name == offloaded_task_name;
});
tlctx->add_module(std::move(module));
auto kernel_symbol = tlctx->lookup_function_pointer(offloaded_task_name);
@@ -199,9 +207,6 @@ FunctionType CodeGenWASM::codegen() {
std::unique_ptr CodeGenWASM::modulegen(
std::unique_ptr &&module) {
- /*
- TODO: move wasm_materialize to dump process in AOT.
- */
bool init_flag = module == nullptr;
std::vector name_list;
@@ -211,8 +216,11 @@ std::unique_ptr CodeGenWASM::modulegen(
gen->emit_to_module();
gen->finalize_taichi_kernel_function();
+ // TODO: move the following functions to dump process in AOT.
if (init_flag) {
- name_list.emplace_back("wasm_materialize");
+ for (auto &name : kPreloadedFuncNames) {
+ name_list.emplace_back(name);
+ }
}
gen->tlctx->jit->global_optimize_module(gen->module.get());
diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h
index ec2b99933..a02f3bdc2 100644
--- a/taichi/ir/frontend_ir.h
+++ b/taichi/ir/frontend_ir.h
@@ -375,6 +375,7 @@ class GlobalVariableExpression : public Expression {
public:
Identifier ident;
DataType dt;
+ std::string name;
SNode *snode;
bool has_ambient;
TypedConstant ambient_value;
diff --git a/taichi/ir/type_utils.cpp b/taichi/ir/type_utils.cpp
index dffe9ff99..7ba723994 100644
--- a/taichi/ir/type_utils.cpp
+++ b/taichi/ir/type_utils.cpp
@@ -25,17 +25,11 @@ std::string data_type_format(DataType dt) {
} else if (dt->is_primitive(PrimitiveTypeID::u32)) {
return "%u";
} else if (dt->is_primitive(PrimitiveTypeID::i64)) {
-#if defined(TI_PLATFORM_UNIX)
+ // Use %lld on Windows.
+ // Discussion: https://github.com/taichi-dev/taichi/issues/2522
return "%lld";
-#else
- return "%I64d";
-#endif
} else if (dt->is_primitive(PrimitiveTypeID::u64)) {
-#if defined(TI_PLATFORM_UNIX)
return "%llu";
-#else
- return "%I64u";
-#endif
} else if (dt->is_primitive(PrimitiveTypeID::f32)) {
return "%f";
} else if (dt->is_primitive(PrimitiveTypeID::f64)) {
diff --git a/taichi/llvm/llvm_context.cpp b/taichi/llvm/llvm_context.cpp
index 19c46efc9..9597a2523 100644
--- a/taichi/llvm/llvm_context.cpp
+++ b/taichi/llvm/llvm_context.cpp
@@ -230,13 +230,6 @@ void compile_runtime_bitcode(Arch arch) {
}
}
-void compile_runtimes() {
- compile_runtime_bitcode(host_arch());
-#if defined(TI_WITH_CUDA)
- compile_runtime_bitcode(Arch::cuda);
-#endif
-}
-
std::string libdevice_path() {
std::string folder;
if (is_release()) {
diff --git a/taichi/program/kernel.cpp b/taichi/program/kernel.cpp
index a5e3ab019..cf884cbc4 100644
--- a/taichi/program/kernel.cpp
+++ b/taichi/program/kernel.cpp
@@ -276,25 +276,25 @@ Context &Kernel::LaunchContextBuilder::get_context() {
float64 Kernel::get_ret_float(int i) {
auto dt = rets[i].dt->get_compute_type();
if (dt->is_primitive(PrimitiveTypeID::f32)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::f64)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i32)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i64)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i8)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i16)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u8)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u16)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u32)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u64)) {
- return (float64)get_current_program().fetch_result(i);
+ return (float64)program->fetch_result(i);
} else {
TI_NOT_IMPLEMENTED
}
@@ -303,25 +303,25 @@ float64 Kernel::get_ret_float(int i) {
int64 Kernel::get_ret_int(int i) {
auto dt = rets[i].dt->get_compute_type();
if (dt->is_primitive(PrimitiveTypeID::i32)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i64)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i8)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::i16)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u8)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u16)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u32)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::u64)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::f32)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else if (dt->is_primitive(PrimitiveTypeID::f64)) {
- return (int64)get_current_program().fetch_result(i);
+ return (int64)program->fetch_result(i);
} else {
TI_NOT_IMPLEMENTED
}
diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp
index 456c74bb3..b8c88894e 100644
--- a/taichi/program/program.cpp
+++ b/taichi/program/program.cpp
@@ -586,6 +586,10 @@ void Program::async_flush() {
async_engine->flush();
}
+int Program::get_snode_tree_size() {
+ return snode_trees_.size();
+}
+
std::string capitalize_first(std::string s) {
s[0] = std::toupper(s[0]);
return s;
diff --git a/taichi/program/program.h b/taichi/program/program.h
index c9c392a48..1eb7ae9ae 100644
--- a/taichi/program/program.h
+++ b/taichi/program/program.h
@@ -169,6 +169,8 @@ class Program {
*/
void materialize_runtime();
+ int get_snode_tree_size();
+
void visualize_layout(const std::string &fn);
struct KernelProxy {
diff --git a/taichi/program/snode_expr_utils.cpp b/taichi/program/snode_expr_utils.cpp
index b83e7e7c0..02e7ee3c3 100644
--- a/taichi/program/snode_expr_utils.cpp
+++ b/taichi/program/snode_expr_utils.cpp
@@ -67,7 +67,11 @@ void place_child(Expr *expr_arg,
}
auto &child = parent->insert_children(SNodeType::place);
glb_var_expr->set_snode(&child);
- child.name = glb_var_expr->ident.raw_name();
+ if (glb_var_expr->name == "") {
+ child.name = glb_var_expr->ident.raw_name();
+ } else {
+ child.name = glb_var_expr->name;
+ }
if (glb_var_expr->has_ambient) {
glb_var_expr->snode->has_ambient = true;
glb_var_expr->snode->ambient_val = glb_var_expr->ambient_value;
diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp
index 918cb24e6..ed469c619 100644
--- a/taichi/python/export_lang.cpp
+++ b/taichi/python/export_lang.cpp
@@ -55,7 +55,6 @@ void expr_assign(const Expr &lhs_, const Expr &rhs, std::string tb) {
std::vector> scope_stack;
-void compile_runtimes();
std::string libdevice_path();
std::string get_runtime_dir();
@@ -231,7 +230,10 @@ void export_lang(py::module &m) {
.def("synchronize", &Program::synchronize)
.def("async_flush", &Program::async_flush)
.def("materialize_runtime", &Program::materialize_runtime)
- .def("make_aot_module_builder", &Program::make_aot_module_builder);
+ .def("make_aot_module_builder", &Program::make_aot_module_builder)
+ .def("get_snode_tree_size", &Program::get_snode_tree_size)
+ .def("get_snode_root", &Program::get_snode_root,
+ py::return_value_policy::reference);
py::class_(m, "AotModuleBuilder")
.def("add", &AotModuleBuilder::add)
@@ -276,6 +278,7 @@ void export_lang(py::module &m) {
get_current_program().get_snode_to_glb_var_exprs());
})
.def("data_type", [](SNode *snode) { return snode->dt; })
+ .def("name", [](SNode *snode) { return snode->name; })
.def("get_num_ch",
[](SNode *snode) -> int { return (int)snode->ch.size(); })
.def(
@@ -358,13 +361,25 @@ void export_lang(py::module &m) {
[](Expr *expr) { return expr->is(); })
.def("is_external_var",
[](Expr *expr) { return expr->is(); })
+ .def("is_primal",
+ [](Expr *expr) {
+ return expr->cast()->is_primal;
+ })
.def("set_tb", &Expr::set_tb)
+ .def("set_name",
+ [&](Expr *expr, std::string na) {
+ expr->cast()->name = na;
+ })
.def("set_is_primal",
[&](Expr *expr, bool v) {
expr->cast()->is_primal = v;
})
.def("set_grad", &Expr::set_grad)
.def("set_attribute", &Expr::set_attribute)
+ .def("get_expr_name",
+ [](Expr *expr) {
+ return expr->cast()->name;
+ })
.def("get_attribute", &Expr::get_attribute)
.def("get_raw_address", [](Expr *expr) { return (uint64)expr; })
.def("get_underlying_ptr_address", [](Expr *e) {
@@ -744,7 +759,6 @@ void export_lang(py::module &m) {
m.def("test_throw", [] { throw IRModified(); });
m.def("needs_grad", needs_grad);
- m.def("compile_runtimes", compile_runtimes);
m.def("libdevice_path", libdevice_path);
m.def("host_arch", host_arch);
diff --git a/taichi/runtime/llvm/runtime.cpp b/taichi/runtime/llvm/runtime.cpp
index 43cb5a1ef..2888e7f19 100644
--- a/taichi/runtime/llvm/runtime.cpp
+++ b/taichi/runtime/llvm/runtime.cpp
@@ -1083,19 +1083,19 @@ i32 op_xor_i32(i32 a, i32 b) {
return a ^ b;
}
-#define DEFINE_REDUCTION(op, dtype) \
- dtype warp_reduce_##op##_##dtype(dtype val) { \
- for (int offset = 16; offset > 0; offset /= 2) \
- val = op_##op##_##dtype( \
- val, cuda_shfl_down_sync_i32(0xFFFFFFFF, val, offset, 31)); \
- return val; \
- } \
- dtype reduce_##op##_##dtype(dtype *result, dtype val) { \
- dtype warp_result = warp_reduce_##op##_##dtype(val); \
- if ((thread_idx() & (warp_size() - 1)) == 0) { \
- atomic_##op##_##dtype(result, warp_result); \
- } \
- return val; \
+#define DEFINE_REDUCTION(op, dtype) \
+ dtype warp_reduce_##op##_##dtype(dtype val) { \
+ for (int offset = 16; offset > 0; offset /= 2) \
+ val = op_##op##_##dtype( \
+ val, cuda_shfl_down_sync_##dtype(0xFFFFFFFF, val, offset, 31)); \
+ return val; \
+ } \
+ dtype reduce_##op##_##dtype(dtype *result, dtype val) { \
+ dtype warp_result = warp_reduce_##op##_##dtype(val); \
+ if ((thread_idx() & (warp_size() - 1)) == 0) { \
+ atomic_##op##_##dtype(result, warp_result); \
+ } \
+ return val; \
}
DEFINE_REDUCTION(add, i32);
@@ -1751,6 +1751,14 @@ i32 wasm_materialize(Context *context) {
(Ptr)((size_t)context->runtime->rand_states + sizeof(RandState));
return (i32)(size_t)context->runtime->roots[0];
}
+
+void wasm_set_kernel_parameter_i32(Context *context, int index, i32 value) {
+ *(i32 *)(&context->args[index]) = value;
+}
+
+void wasm_set_kernel_parameter_f32(Context *context, int index, f32 value) {
+ *(f32 *)(&context->args[index]) = value;
+}
}
#endif
diff --git a/tests/conftest.py b/tests/python/conftest.py
similarity index 100%
rename from tests/conftest.py
rename to tests/python/conftest.py
diff --git a/tests/python/test_field.py b/tests/python/test_field.py
index 363e71f5d..53dd5af13 100644
--- a/tests/python/test_field.py
+++ b/tests/python/test_field.py
@@ -92,3 +92,18 @@ def test_default_ip(dtype):
x = ti.Vector.field(2, int, ())
assert x.dtype == ti.get_runtime().default_ip
+
+
+@ti.test()
+def test_field_name():
+ a = ti.field(dtype=ti.f32, shape=(2, 3), name='a')
+ b = ti.Vector.field(3, dtype=ti.f32, shape=(2, 3), name='b')
+ c = ti.Matrix.field(3, 3, dtype=ti.f32, shape=(5, 4), name='c')
+ assert a.name == 'a'
+ assert b.name == 'b'
+ assert c.name == 'c'
+ assert b.snode.name == 'b'
+ d = []
+ for i in range(10):
+ d.append(ti.field(dtype=ti.f32, shape=(2, 3), name=f'd{i}'))
+ assert d[i].name == f'd{i}'
diff --git a/tests/python/test_fields_builder.py b/tests/python/test_fields_builder.py
index f315b4692..a4edd92a7 100644
--- a/tests/python/test_fields_builder.py
+++ b/tests/python/test_fields_builder.py
@@ -13,18 +13,31 @@ def test_fields_with_shape():
def func():
for i in range(n):
x[i] = i
- for i in range(n):
- assert x[i] == i
+ func()
+
+ for i in range(n):
+ assert x[i] == i
+
+ y = ti.field(ti.f32, [n])
+
+ @ti.kernel
+ def func2():
for i in range(n):
- x[i] = i * 2
+ y[i] = i * 2
for i in range(n):
- assert x[i] == i * 2
+ x[i] = i * 3
+
+ func2()
+
+ for i in range(n):
+ assert x[i] == i * 3
+ assert y[i] == i * 2
func()
- with pytest.raises(InvalidOperationError, match='FieldsBuilder finalized'):
- y = ti.field(ti.f32, [n])
+ for i in range(n):
+ assert x[i] == i
@ti.test(arch=[ti.cpu, ti.cuda])
@@ -72,7 +85,7 @@ def func2():
assert x[i] == i * 3
-@ti.test(arch=[ti.cpu, ti.cuda])
+@ti.test(arch=[ti.cpu, ti.cuda], use_unified_memory=True)
def test_fields_builder_pointer():
n = 5
diff --git a/tests/python/test_loop_unique.py b/tests/python/test_loop_unique.py
index c4a33d15d..7ee0cce48 100644
--- a/tests/python/test_loop_unique.py
+++ b/tests/python/test_loop_unique.py
@@ -1,7 +1,7 @@
import taichi as ti
-@ti.test(require=ti.extension.sparse)
+@ti.test(require=ti.extension.sparse, use_unified_memory=True)
def test_loop_unique_simple_1d():
x, y = ti.field(ti.i32), ti.field(ti.i32)
@@ -26,7 +26,7 @@ def inc_y():
assert y[i] == expected_result.get(i, 0)
-@ti.test(require=ti.extension.sparse)
+@ti.test(require=ti.extension.sparse, use_unified_memory=True)
def test_loop_unique_binary_op_1d():
x, y = ti.field(ti.i32), ti.field(ti.i32)
@@ -51,7 +51,7 @@ def inc_y():
assert y[i] == expected_result.get(i, 0)
-@ti.test(require=ti.extension.sparse)
+@ti.test(require=ti.extension.sparse, use_unified_memory=True)
def test_loop_unique_nested_1d():
x, y = ti.field(ti.i32), ti.field(ti.i32)
@@ -77,7 +77,7 @@ def inc_y():
assert y[i] == expected_result.get(i, 0)
-@ti.test(require=ti.extension.sparse)
+@ti.test(require=ti.extension.sparse, use_unified_memory=True)
def test_loop_unique_2d():
x, y, z = ti.field(ti.i32), ti.field(ti.i32), ti.field(ti.i32)
diff --git a/tests/python/test_mpm_particle_list.py b/tests/python/test_mpm_particle_list.py
index 61e3febb0..de94bc4ed 100644
--- a/tests/python/test_mpm_particle_list.py
+++ b/tests/python/test_mpm_particle_list.py
@@ -33,7 +33,7 @@ def __init__(self, res):
def build_pid(self):
ti.block_dim(256)
for p in self.x:
- base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int)
+ base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int) + 1
ti.append(self.pid.parent(), base, p)
def step(self):
diff --git a/tests/python/test_no_grad.py b/tests/python/test_no_grad.py
index ef6445130..4aba4558a 100644
--- a/tests/python/test_no_grad.py
+++ b/tests/python/test_no_grad.py
@@ -1,3 +1,6 @@
+import numpy as np
+import pytest
+
import taichi as ti
@@ -19,3 +22,23 @@ def func():
with ti.Tape(loss):
func()
+
+
+@ti.all_archs
+def test_raise_no_gradient():
+ y = ti.field(shape=(), name='y', dtype=ti.f64, needs_grad=True)
+ x = ti.field(shape=(), name='x', dtype=ti.f32)
+ z = np.array([1.0])
+
+ @ti.kernel
+ def func(x: ti.template()):
+ y[None] = x.grad[None] * x.grad[None]
+ z[0] = x.grad[None]
+
+ x[None] = 5.
+ with pytest.raises(RuntimeError) as e:
+ func(x)
+
+ assert e.type is RuntimeError
+ assert e.value.args[
+ 0] == f"Gradient x.grad has not been placed, check whether `needs_grad=True`"
diff --git a/tests/python/test_reduction.py b/tests/python/test_reduction.py
index f390cbe6e..0b859665b 100644
--- a/tests/python/test_reduction.py
+++ b/tests/python/test_reduction.py
@@ -39,10 +39,18 @@ def _test_reduction_single(dtype, criterion, op):
a = ti.field(dtype, shape=N)
tot = ti.field(dtype, shape=())
- @ti.kernel
- def fill():
- for i in a:
- a[i] = i
+ if dtype in [ti.f32, ti.f64]:
+
+ @ti.kernel
+ def fill():
+ for i in a:
+ a[i] = i + 0.5
+ else:
+
+ @ti.kernel
+ def fill():
+ for i in a:
+ a[i] = i
ti_op = ti_ops[op]
@@ -62,7 +70,8 @@ def reduce_tmp() -> dtype:
reduce()
tot2 = reduce_tmp()
- ground_truth = np_ops[op](a.to_numpy())
+ np_arr = np.append(a.to_numpy(), [0])
+ ground_truth = np_ops[op](np_arr)
assert criterion(tot[None], ground_truth)
assert criterion(tot2, ground_truth)
diff --git a/tests/python/test_sparse_basics.py b/tests/python/test_sparse_basics.py
index 2cece4351..c6b27d73b 100644
--- a/tests/python/test_sparse_basics.py
+++ b/tests/python/test_sparse_basics.py
@@ -1,3 +1,5 @@
+import pytest
+
import taichi as ti
@@ -75,3 +77,19 @@ def func():
assert s[None] == 5 * n
print(x[257 + n * n * 7])
assert s[None] == 5 * n
+
+
+@pytest.mark.skip(reason='https://github.com/taichi-dev/taichi/issues/2520')
+@ti.test(require=ti.extension.sparse, use_unified_memory=False)
+def test_pointer_direct_place():
+ x, y = ti.field(ti.i32), ti.field(ti.i32)
+
+ N = 1
+ ti.root.pointer(ti.i, N).place(x)
+ ti.root.pointer(ti.i, N).place(y)
+
+ @ti.kernel
+ def foo():
+ pass
+
+ foo()
diff --git a/tests/python/test_tensor_reflection.py b/tests/python/test_tensor_reflection.py
index 5bac5cdbb..cdae5af42 100644
--- a/tests/python/test_tensor_reflection.py
+++ b/tests/python/test_tensor_reflection.py
@@ -59,7 +59,7 @@ def test_unordered():
assert val.snode in blk3.get_children()
assert blk3 in blk2.get_children()
assert blk2 in blk1.get_children()
- assert blk1 in ti.root.get_children()
+ assert blk1 in ti.FieldsBuilder.finalized_roots()[0].get_children()
expected_str = f'ti.root => dense {[n]} => dense {[n, m]}' \
f' => dense {[n, m, p]} => place {[n, m, p]}'