Problem with a simple add vecs for some arrays sizes on CUDA
Created by: jkozdon
So I am not sure what I am doing wrong:
using KernelAbstractions
using CUDA
using Printf
using Random
@kernel function ka_knl_add!(C, A, B)
I = @index(Global)
@inbounds C[I] = A[I] + B[I]
end
function main(device)
@show device
ka_add! = ka_knl_add!(device, device isa CPU ? 4 : 1024)
for k = 2:2:28
N = 2^k
A = rand(N)
B = rand(N)
C = similar(A)
if device isa CUDADevice
A = CuArray(A)
B = CuArray(B)
C = CuArray(C)
end
wait(ka_add!(C, A, B, ndrange = N))
if C ≠ A + B
println()
println("2^$k = $N")
@show C == A + B
@show C ≈ A + B
@show extrema(Array(C - A - B))
end
end
end
Random.seed!(77)
main(CPU())
main(CUDADevice())
But for some arrays sizes on a V100 this produces:
device = CPU()
device = CUDADevice()
2^16 = 65536
C == A + B = false
C ≈ A + B = false
extrema(Array((C - A) - B)) = (-5.447940142435828, 0.13998980843263142)
2^18 = 262144
C == A + B = false
C ≈ A + B = false
extrema(Array((C - A) - B)) = (-1.9034060006220799, 1.8951141473846453)