-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.jl
86 lines (70 loc) · 1.88 KB
/
test.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
using BenchmarkTools
using CuArrays
using Flux
f(m, x) = m(x)
in, out, batchsize = 1536, 1024, 64
σ = Flux.leakyrelu
m = CachedDense(in, out, σ; batchsize=batchsize) |> Flux.gpu
mf = Flux.Dense(in, out, σ) |> Flux.gpu
x = rand(Float32, in) |> Flux.gpu;
@benchmark CuArrays.@sync f($m, $x)
@benchmark CuArrays.@sync f($mf, $x)
x = rand(Float32, in, batchsize÷2) |> Flux.gpu;
@benchmark CuArrays.@sync f($m, $x)
@benchmark CuArrays.@sync f($mf, $x)
x = rand(Float32, in, batchsize) |> Flux.gpu;
@benchmark CuArrays.@sync f($m, $x)
@benchmark CuArrays.@sync f($mf, $x)
x = rand(Float32, in) |> Flux.gpu;
CuArrays.@time f(m, x);
CuArrays.@time f(mf, x);
x = rand(Float32, in, batchsize÷2) |> Flux.gpu;
CuArrays.@time f(m, x);
CuArrays.@time f(mf, x);
x = rand(Float32, in, batchsize) |> Flux.gpu;
CuArrays.@time f(m, x);
CuArrays.@time f(mf, x);
cdims = Flux.NNlib.DenseConvDims(x,x)
@edit Flux.conv!(x, m.W, x,cdims)
###
x = rand(Float32, in, batchsize) |> gpu
m = CachedDense(in, out; batchsize=batchsize) |> gpu
θ = Flux.params(m)
mf = Dense(copy(m.W), copy(m.b)) |> gpu
θf = Flux.params(mf)
mf(x) == m(x)
Juno.@profiler gs = gradient(θ) do
sum(m(x))
end
@time gsf = gradient(θf) do
sum(mf(x))
end
gsf[mf.W] == gs[m.W]
gsf[mf.b] == gs[m.b]
###
function reorder(x::AbstractMatrix)
o = size(x,1) ÷ 4
[x[1:(2o),:];
x[(3o+1):end,:];
x[(2o+1):(3o),:]]
end
function reorder(x::AbstractVector)
o = size(x,1) ÷ 4
[x[1:(2o)];
x[(3o+1):end];
x[(2o+1):(3o)]]
end
in, out, batchsize = 3, 5, 7
x = rand(Float32, in, batchsize) |> gpu
mf = Flux.LSTM(in, out) |> gpu; mf(x)
(h, c) = mf.state
(h1, c1), h1 = mf.cell((h, c), x)
m = CachedLSTMCell(in, out; batchsize=3batchsize) |> gpu
m = CachedLSTMCell(reorder(mf.cell.Wi), reorder(mf.cell.Wh), reorder(mf.cell.b), m.cache) |> gpu
(h2, c2), h2 = m((copy(h), copy(c)), x)
h1 == h2
c1 == c2
h1 - h2
c1 - c2
h1 ≈ h2
c1 ≈ c2