import taichi as ti
# ti.init() # default start Taichi on CPU
# ti.init(arch = ti.cpu) # start Taichi on local CPU architecture
# ti.init(arch = ti.gpu) # start Taichi on GPU architecture
ti.init(arch = ti.cuda) # start Taichi on CUDA architecture
Python 作用域(Python-scope):参考传统 Python 单文件;
Taichi 作用域(Taichi-scope):夹杂在 Python 文件中使用 @ti.kernel
,@ti.func
等修饰符修饰的作用域,将会被送到对应框架(arch = ?
)上实施并行化运算,可类比 C++ 化的 shader。
Taichi example 文件 mandelbrot_zoom.py
中的作用域示例:
# Begin of Python-scope
import taichi as ti
from taichi.math import cmul, dot, log2, vec2, vec3
ti.init(arch=ti.gpu)
MAXITERS = 100
width, height = 800, 640
pixels = ti.Vector.field(3, ti.f32, shape=(width, height))
# Begin of Taichi-scope
@ti.func
def setcolor(z, i):
v = log2(i + 1 - log2(log2(z.norm()))) / 5
col = vec3(0.)
if v < 1.0:
col = vec3(v**4, v**2.5, v)
else:
v = ti.max(0., 2 - v)
col = vec3(v, v**1.5, v**3)
return col
@ti.kernel
def render(time: ti.f32):
zoo = 0.64 + 0.36 * ti.cos(0.02 * time)
zoo = ti.pow(zoo, 8.0)
ca = ti.cos(0.15 * (1.0 - zoo) * time)
sa = ti.sin(0.15 * (1.0 - zoo) * time)
for i, j in pixels:
c = 2.0 * vec2(i, j) / height - vec2(1)
#c *= 1.16
xy = vec2(c.x * ca - c.y * sa, c.x * sa + c.y * ca)
c = vec2(-0.745, 0.186) + xy * zoo
z = vec2(0.)
count = 0.
while count < MAXITERS and dot(z, z) < 50:
z = cmul(z, z) + c
count += 1.
if count == MAXITERS:
pixels[i, j] = [0, 0, 0]
else:
pixels[i, j] = setcolor(z, count)
# End of Taichi-scope
def main():
gui = ti.GUI("Mandelbrot set zoom", res=(width, height))
for i in range(100000):
render(i * 0.03)
gui.set_image(pixels)
gui.show()
if __name__ == '__main__':
main()
# End of Python-scope
# ti.init(arch = ti.cpu) # start Taichi on local CPU architecture
ti.init(arch = ti.cuda) # start Taichi on CUDA architecture
ti.i8
,ti.i6
,ti.i32
(默认),ti.i64
;ti.u8
,ti.u16
,ti.u32
,ti.u64
;ti.f32
(默认),ti.f64
。ti.init(arch = ti.metal) # start Taichi on Metal architecture
Metal 框架下不支持 64 位数据类型,如
ti.i64
,ti.u64
,ti.f64
。
ti.init(arch = ti.opengl) # start Taichi on OpenGL architecture
OpenGL 框架下不支持短整型与无符号整型,如
ti.i8
,ti.i16
,ti.u8
,ti.u16
,ti.u32
,ti.u64
。
在 Taichi 初始化函数可传参重载默认数据类型:
# reset the default signed integer type to ti.i64, the default float type to ti.f64
ti.init(arch = ti.cuda, default_ip = ti.i64, default_fp = ti.f64)
注意
arch
默认应值为cpu
如本地x64
,必要时不可缺省。
如:
i32 + f32 -> f32
,i32 + i64 -> i64
。
import taichi as ti
ti.init(arch = ti.cuda)
def funcPythonScope():
a = 1; a = 2.33
print(a)
@ti.kernel
def funcTaichiScope():
a = 1; a = 2.33
print(a)
funcPythonScope() # output: 2.33
funcTaichiScope() # output: 2
newVar = ti.cast(var, type)
可进行数据类型的显式转换。import taichi as ti
ti.init(arch = ti.cuda)
def funcTaichiScope():
a = 2.33; b = ti.cast(a, ti.i32)
print(a, b)
funcTaichiScope() # output: 2.330000 2
Taichi 内部预定义了一些复合数据类型的关键字,如 ti.Vector
,ti.Matrix
,ti.Struct
;当然用户也可以自定义复合数据类型,此处类比 C++ 中的 typedef struct
。
import taichi as ti
ti.init(arch = ti.cuda)
vec3f = ti.types.vector(3, ti.f32)
mat2f = ti.types.matrix(2, 2, ti.f32)
ray = ti.types.struct(ro = vec3f, rd = vec3f, l = ti.f32)
@ti.kernel
def funcTaichiScope_01():
a = ti.Vector([0.0, 0.0, 0.0]); print(a)
d = ti.Vector([0.0, 1.0, 0.0]); print(d)
B = ti.Matrix([[2.3, -1], [1.2, 1.4]]); print("B =", B, "B[1, 0] =", B[1, 0])
r = ti.Struct(v1 = a, v2 = d, l = 1); print("r.v1 =", r.v1)
@ti.kernel
def funcTaichiScope_02():
a = vec3f(1.0); print(a)
d = vec3f(0.0, 1.0, 0.0); print(d)
B = mat2f([[2.3, -1], [1.2, 1.4]]); print("B =", B)
r = ray(ro = a, rd = d, l = 1); print("r.ro =", r.ro)
funcTaichiScope_01()
'''
[0.000000, 0.000000, 0.000000]
[0.000000, 1.000000, 0.000000]
B = [[2.300000, -1], [1.200000, 1.400000]] B[1, 0] = 1.200000
r.v1 = [0.000000, 0.000000, 0.000000]
'''
funcTaichiScope_02()
'''
output:
[1.000000, 1.000000, 1.000000]
[0.000000, 1.000000, 0.000000]
B = [[2.300000, -1.000000], [1.200000, 1.400000]]
r.ro = [1.000000, 1.000000, 1.000000]
'''
注意 Taichi 中的矩阵可通过多 index 随机访问,如上述代码第 12 行中的
B = ti.Matrix([[2.3, -1], [1.2, 1.4]]); print(B[1, 0])
。
可通过关键字标识符 ti.field(dtype: Any, shape: Any | None = None, ...)
来声明场数据类型。
## For Examples
# heat map of a 256*256 grid
heat_field = ti.field(dtype = ti.f32, shape = (256, 256))
# 3D gravitational field in a 256*256*128 room
gravitational_field = ti.Vector.field(n = 3, dtype = ti.f32, shape = (256, 256, 128))
# 2D strain-tensor field in a 64*64 grid
strain_tensor_field = ti.Matrix.field(n = 2, m = 2, dtype = ti.f32, shape = (64, 64))
# a global scalar that I want to access in a Taichi kernel
global_scalar = ti.field(dtype = ti.f32, shape=())
B[1, 0]
,此处可有 heat_field[30, 48]
。import taichi as ti
ti.init(arch = ti.cuda)
pixels = ti.field(dtype = float, shape = (4, 8)) # filed: matrix (4*8); elements: float
vf = ti.Vector.field(3, ti.f32, shape = 4) # filed: vector (4*1); elements: vector (3*1)
zero_d_scalar = ti.field(ti.f32, shape = ()) # filed: scaler; element: ti.f32
zero_d_vec = ti.Vector.field(2, ti.f32, shape = ()) # filed: scaler; element: vector (2*1)
def funcPythonScope():
pixels[1, 2]; print(pixels)
zero_d_scalar[None] = 2.33; print(zero_d_scalar[None])
zero_d_vec[None] = ti.Vector([1.1, 1.4]); print(zero_d_vec[None])
@ti.kernel
def funcTaichiScope():
v = ti.Vector([1, 2, 3])
vf[0] = v; print(vf[0], vf[1], vf[2], vf[3])
zero_d_scalar[None] = 1.5; print(zero_d_scalar[None])
zero_d_vec[None] = ti.Vector([1.5, 4.4]); print(zero_d_vec[None])
if __name__ == "__main__":
funcPythonScope()
funcTaichiScope()
'''
[[0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0.]]
2.3299999237060547
[1.10000002 1.39999998]
[1.000000, 2.000000, 3.000000] [0.000000, 0.000000, 0.000000] [0.000000, 0.000000, 0.000000] [0.000000, 0.000000, 0.000000]
1.500000
[1.500000, 4.400000]
'''
注意,标量场(
shape = ()
)的随机访问须使用下标None
访问,如zero_d_vec[None]
。
被 @ti.kernel
在头部修饰的 Python 函数段为一个 Taichi 计算核(核函数,kernel )。单文件中可定义多个核函数,可从外界 Python 作用域调用 Taichi 核函数,但核函数间不可互相调用,不可递归,否则会线程冲突产生异常。
import taichi as ti
ti.init(arch = ti.cuda)
def funcPython():
print("Python function legally calls Taichi kernel 02.")
Kernel_02() # legal
@ti.kernel
def Kernel_01():
print("Taichi kernel 01 illegally calls Taichi kernel 02.")
# Kernel_02() # illegal
@ti.kernel
def Kernel_02():
print("Taichi kernel 02 is called.")
funcPython()
# Kernel_01()
Kernel_02() # legal
若程序在并行化环境内运行(例如 arch = ti.cuda
),Taichi 计算核内的最外层 for
循环们会被自动分配到不同线程做并行化处理,它们中的次外层以及更内层循环在每个线程中依然是串行处理的。
import taichi as ti
ti.init(arch = ti.cuda)
@ti.kernel
def parallelTest():
# parallelized
for i in range(5):
# serialized in each parallel thread
for j in range(3):
print(i, j, sep=',', end='\t')
print("\nthread", i, "done", end='')
print('')
# parallelized
for k in range(10):
print("k =", k, end='\t')
print('')
parallelTest()
'''
0,0 1,0 2,0 3,0 4,0 0,1 1,1 2,1 3,1 4,1 0,2 1,2 2,2 3,2 4,2
thread 0 done
thread 1 done
thread 2 done
thread 3 done
thread 4 done
k = 0 k = 1 k = 2 k = 3 k = 4 k = 5 k = 6 k = 7 k = 8 k = 9
'''
不难想到,
for
循环外如果被嵌套了逻辑跳转层,那么他就不会被认作为最外层,也不会被并行化处理(逻辑跳转会打破并行化的正常线程分配)。
@ti.kernel
def parallelTest_01(k: ti.i32):
# run in parallel
for i in range(1000):
if k > 108:
...
@ti.kernel
def parallelTest_02(k: ti.i32):
if k > 108:
# run in serial
for i in range(1000):
...
为了最大化并行化效率,可将多层循环调整顺序或拆解,优先并行化处理体量大的循环。
def myForLoop():
for i in range(100):
# for j in range(1000):
# ...
myTaichiFor()
@ti.kernel
def myTaichiFor():
for j in range(1000):
...
myForLoop()
若该层代码会被并行化执行,其中不可出现例如
break
的逻辑中断语句,否则会执行中断异常(并行化后各线程间的不再考虑逻辑先后顺序)。
@ti.kernel
def myTaichiFor_01():
# run in parallel
for i in range(1000):
...
# break # ILLEGAL
@ti.kernel
def myTaichiFor_02():
# run in parallel
for i in range(100):
# run in serial
for j in range(50):
...
break # legal
在并行化处理的外层 for
循环中,如涉及四则运算,尽量使用自加减等原子操作。若采取普通运算赋值则没有原子锁读写保护,会产生结果异常。
import taichi as ti
ti.init(arch = ti.cuda)
x = ti.field(dtype = ti.f32, shape = 100)
total1 = ti.field(ti.f32, shape = ()) # init as 0
total2 = ti.field(ti.f32, shape = ()) # init as 0
total3 = ti.field(ti.f32, shape = ()) # init as 0
def init():
# total1[None] = 0; total2[None] = 0; total3[None] = 0
for i in range(100):
x[i] = i
@ti.kernel
def funcSum():
for i in range(100):
# no data race
total1[None] += x[i]
# no data race
ti.atomic_add(total2[None], x[i])
# DATA RACE!
total3[None] = total3[None] + x[i]
init()
funcSum()
print(total1[None], total2[None], total3[None]) # output: 4950.0 4950.0 32.0
for
循环在核函数内可以编写为多 index 遍历场数据类型的形式,且需作为最外层循环出现,实现并行化处理。import taichi as ti
ti.init(arch = ti.cuda)
N = 10
x = ti.field(dtype = ti.i32, shape=N)
y = ti.Vector.field(2, dtype = ti.i32, shape = (N, N))
@ti.kernel
def solve():
# range-for
for i in range(N):
x[i] = i
# struct-for
for i, j in y:
y[i, j] = ti.Vector([i, j])
solve()
# print(x, y)
可以使用 ti.sync()
方法来阻塞新线程的调用,直至已申请线程全部运行完毕并释放,以达到同步效果。
import taichi as ti
ti.init(arch = ti.gpu)
@ti.kernel
def syncTest():
for outter in range(3):
for i in range(5):
print(i, end=' ')
print("inside kernel")
print("before kernel")
syncTest(); syncTest(); syncTest()
print("after kernel")
ti.sync() # Blocks the calling thread until all the previously launched Taichi kernels have completed.
print("after sync")
syncTest()
'''
before kernel
after kernel
0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 inside kernel
0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 inside kernel
0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 inside kernel
after sync
0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 inside kernel
'''
最多支持从 Python 作用域到 Taichi 作用域的传参数量为 8 8 8 。
import taichi as ti
ti.init(arch = ti.cuda)
@ti.kernel
def typeHintedTest(x: ti.i32, y: ti.f32):
print(x + y)
@ti.kernel
def vectorParameterTest(vx: ti.f32, vy: ti.f32):
v = ti.Vector([vx, vy])
print(v)
@ti.kernel
def passByValueTest(x: ti.i32):
'''
new change:
'Kernel argument "x" is immutable in the kernel.
If you want to change its value, please create a new variable.'
'''
# x += 1
print("x in kernel =", x)
typeHintedTest(2, 3.3) # output: 5.300000
v = ti.Vector([11.4, 15.4]); vectorParameterTest(v[0], v[1]) # output: [11.400000, 15.400000]
x = 100; passByValueTest(x); print("x outside =", x) # output: x outside = 100
# x in kernel = 100
Taichi 计算核仅可返回一标量值,并指定数据类型,也可选择不返回。
import taichi as ti
ti.init(arch = ti.cuda)
@ti.kernel
def returnTest() -> ti.i32: # return type hinted: ti.i32
return 233.666
print(type(returnTest())) # output:
print(returnTest()) # output: 233
被 @ti.func
在头部修饰的 Python 函数段为一个 Taichi 函数( Taichi function )。单文件中可定义多个 Taichi 函数,用于 Taichi 作用域中复用代码。不可从外界 Python 作用域调用 Taichi 函数。
import taichi as ti
ti.init(arch = ti.cuda)
@ti.func
def funcTaichi():
print("Taichi function is called.")
@ti.kernel
def Kernel():
print("Taichi kernel legally calls Taichi function.")
funcTaichi() # legal
# Python function legally calls Taichi function.
# funcTaichi() # illegal, error: Taichi functions cannot be called from Python-scope.
Kernel()
import taichi as ti
ti.init(arch = ti.cuda)
@ti.func
def funcTaichi_01(vec):
v = ti.Vector([vec[0], vec[1]+1])
return v
@ti.func
def funcTaichi_02(vec):
return vec[0], vec[1]
@ti.kernel
def Kernel():
vec = ti.Vector([2, 3.3])
ret = funcTaichi_01(vec); print(ret) # output: [2, 4.300000]
x, y = funcTaichi_02(vec); print(x, y) # output: 2 3.300000
Kernel()
__global__
函数,且最外层会被并行化处理;__device__
函数,且会被强制内联。所有内容在 Taichi 作用域中可被视作静态的:
ti.Vector
与 ti.f32
之间不可相互赋值,可类比 C++),否则会报错;此处的静态数据类型并不是指变量值为常量不可修改,而是指数据类型。
由于 Taichi 作用域中的数据静态,在程序运行时 Python 作用域中的变量修改对 Taichi 透明,如需共用全局变量,需采用场数据类型。
import taichi as ti
ti.init(arch = ti.cuda)
a = 42
b = ti.field(ti.i32, shape=())
@ti.kernel
def staticTest():
print("a in kernel =", a);
print("b in kernel =", b[None])
b[None] = 42
staticTest()
a = 53
b[None] = 53
print("a outside =", a)
print("b outside =", b[None])
staticTest()
'''
a in kernel = 42
b in kernel = 42
a outside = 53
b outside = 53
a in kernel = 42
b in kernel = 53
'''
import taichi as ti
ti.init(arch = ti.gpu)
a = ti.field(ti.f32, shape=())
b = ti.field(ti.f32, shape=())
c = ti.field(ti.f32, shape=())
@ti.kernel
def taichiCopyTest():
vaca = ti.Vector([1.0, 2.0]); vacb = vaca
vacb[0] = 1.14; print("vaca_taichi =", vaca)
def pythonCopyTest():
vaca = ti.Vector([1.0, 2.0]); vacb = vaca
vacb[0] = 1.14; print("vaca_python =", vaca)
taichiCopyTest() # output: vaca_python = [1.14 2. ]
pythonCopyTest() # output: vaca_taichi = [1.000000, 2.000000]
print("a =", a[None]) # output: a = 0.0
b = a # shallow copy
b[None] = 1.0
print("a =", a[None]) # output: a = 1.0
c.copy_from(a) # deep copy
c[None] = 2.0
print("a =", a[None]) # output: a = 1.0
如第 23 行,Python 作用域中对场进行深拷贝可使用
copy_from
成员方法。
Taichi 内置了一些数学函数,例如三角函数、矩阵变换求解等,如需详情,可移步至 Taichi B站视频图形课或官方文档。
Taichi 中的矩阵乘法运算符为
@
,*
代表对应位置元素数乘。