Skip to content

Commit

Permalink
Merge #239
Browse files Browse the repository at this point in the history
239: Add some repeats, and plotting of marginal CDFs for validation  r=odunbar a=odunbar

## Purpose 

Allows for some repeats in `Emulator/L63` example to see if randomization in the algorithm changes the results. Also adds a new plot for all the CDFs from all repeats for comparison

## Plots from examples with `n_repeats = 30`
The new CDF plots over all 30 repeated runs: (Orange = true, Blue = emulated)
![l63_cdfs](https://github.com/CliMA/CalibrateEmulateSample.jl/assets/47412152/353a31ac-2f74-4902-9be7-7e78b280c9a4)
Also diagnostic plots from run 1:
![l63_test](https://github.com/CliMA/CalibrateEmulateSample.jl/assets/47412152/29f9d7e9-f957-4d38-b1b5-947a37c33e12)
![l63_attr](https://github.com/CliMA/CalibrateEmulateSample.jl/assets/47412152/0fdcb1f2-e25d-49c9-b01e-60a11894fb57)
![l63_pdf](https://github.com/CliMA/CalibrateEmulateSample.jl/assets/47412152/741c8c37-5d89-45a2-ba2c-b57d40737bf0)


<!---
Review checklist

I have:
- followed the codebase contribution guide: https://clima.github.io/ClimateMachine.jl/latest/Contributing/
- followed the style guide: https://clima.github.io/ClimateMachine.jl/latest/DevDocs/CodeStyle/
- followed the documentation policy: https://github.com/CliMA/policies/wiki/Documentation-Policy
- checked that this PR does not duplicate an open PR.

In the Content, I have included 
- relevant unit tests, and integration tests, 
- appropriate docstrings on all functions, structs, and modules, and included relevant documentation.

-->

----
- [ ] I have read and checked the items on the review checklist.


Co-authored-by: odunbar <[email protected]>
  • Loading branch information
bors[bot] and odunbar authored Oct 25, 2023
2 parents b16cfd5 + 4daf48b commit 9227bc2
Showing 1 changed file with 203 additions and 150 deletions.
353 changes: 203 additions & 150 deletions examples/Emulator/L63/emulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ function main()
# rng
rng = MersenneTwister(1232434)

n_repeats = 1 # repeat exp with same data.
println("run experiment $n_repeats times")

# Run L63 from 0 -> tmax
u0 = [1.0; 0.0; 0.0]
tmax = 20
Expand All @@ -46,11 +49,31 @@ function main()
prob_hist = ODEProblem(lorenz, u0_hist, tspan_hist)
sol_hist = solve(prob_hist, Euler(), dt = dt)


# test data for plotting
xspan_test = 0.0:dt:tmax_test
solplot = zeros(3, length(xspan_test))
for i in 1:length(xspan_test)
solplot[:, i] = sol_test.u[i] #noiseless
end

# hist data for plotting
xspan_hist = 0.0:dt:tmax_hist
solhist = zeros(3, length(xspan_hist))
for i in 1:length(xspan_hist)
solhist[:, i] = sol_hist.u[i] #noiseless
end

# Create training pairs (with noise) from subsampling [burnin,tmax]
tburn = 10
burnin = Int(floor(10 / dt))
tburn = 1 # NB works better with no spin-up!
burnin = Int(floor(tburn / dt))
n_train_pts = 600 #600
ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts])
sample_rand = true
if sample_rand
ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts])
else
ind = burnin:(n_train_pts + burnin)
end
n_tp = length(ind)
input = zeros(3, n_tp)
output = zeros(3, n_tp)
Expand All @@ -62,176 +85,206 @@ function main()
end
iopairs = PairedDataContainer(input, output)


# Emulate
cases =
["GP", "RF-scalar", "RF-scalar-diagin", "RF-vector-svd-nonsep", "RF-vector-nosvd-nonsep", "RF-vector-nosvd-sep"]

case = cases[1]
case = cases[5]
decorrelate = true
nugget = Float64(1e-12)
overrides = Dict(
"verbose" => true,
"scheduler" => DefaultScheduler(1.0),#DataMisfitController(terminate_at = 1e4),
"cov_sample_multiplier" => 0.5,
"n_features_opt" => 200,
"n_iteration" => 20,
# "n_ensemble" => 20,
# "localization" => EKP.Localizers.SEC(1.0,0.1),
"accelerator" => NesterovAccelerator(),
)

# Build ML tools
if case == "GP"
gppackage = Emulators.GPJL()
pred_type = Emulators.YType()
mlt = GaussianProcess(
gppackage;
kernel = nothing, # use default squared exponential kernel
prediction_type = pred_type,
noise_learn = false,
)

elseif case ["RF-scalar", "RF-scalar-diagin"]
n_features = 10 * Int(floor(sqrt(3 * n_tp)))
kernel_structure =
case == "RF-scalar-diagin" ? SeparableKernel(DiagonalFactor(nugget), OneDimFactor()) :
SeparableKernel(LowRankFactor(3, nugget), OneDimFactor())
mlt = ScalarRandomFeatureInterface(
n_features,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
elseif case ["RF-vector-svd-nonsep"]
kernel_structure = NonseparableKernel(LowRankFactor(6, nugget))
n_features = 500

mlt = VectorRandomFeatureInterface(
n_features,
3,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
u_test = []
u_hist = []
train_err = []
for rep_idx in 1:n_repeats

overrides = Dict(
"verbose" => true,
"scheduler" => DataMisfitController(terminate_at = 1e4),
"cov_sample_multiplier" => 0.5,
"n_features_opt" => 200,
"n_iteration" => 20,
"accelerator" => NesterovAccelerator(),
)
elseif case ["RF-vector-nosvd-nonsep"]
kernel_structure = NonseparableKernel(LowRankFactor(3, nugget))
n_features = 500
decorrelate = false # don't do SVD
mlt = VectorRandomFeatureInterface(
n_features,
3,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
elseif case ["RF-vector-nosvd-sep"]
kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget))
n_features = 500
decorrelate = false # don't do SVD
mlt = VectorRandomFeatureInterface(
n_features,
3,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
end

# Emulate
emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate)
optimize_hyperparameters!(emulator)

# Build ML tools
if case == "GP"
gppackage = Emulators.GPJL()
pred_type = Emulators.YType()
mlt = GaussianProcess(
gppackage;
kernel = nothing, # use default squared exponential kernel
prediction_type = pred_type,
noise_learn = false,
)

elseif case ["RF-scalar", "RF-scalar-diagin"]
n_features = 10 * Int(floor(sqrt(3 * n_tp)))
kernel_structure =
case == "RF-scalar-diagin" ? SeparableKernel(DiagonalFactor(nugget), OneDimFactor()) :
SeparableKernel(LowRankFactor(3, nugget), OneDimFactor())
mlt = ScalarRandomFeatureInterface(
n_features,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
elseif case ["RF-vector-svd-nonsep"]
kernel_structure = NonseparableKernel(LowRankFactor(6, nugget))
n_features = 500

mlt = VectorRandomFeatureInterface(
n_features,
3,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
elseif case ["RF-vector-nosvd-nonsep"]
kernel_structure = NonseparableKernel(LowRankFactor(3, nugget))
n_features = 500
decorrelate = false # don't do SVD
mlt = VectorRandomFeatureInterface(
n_features,
3,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
elseif case ["RF-vector-nosvd-sep"]
kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget))
n_features = 500
decorrelate = false # don't do SVD
mlt = VectorRandomFeatureInterface(
n_features,
3,
3,
rng = rng,
kernel_structure = kernel_structure,
optimizer_options = overrides,
)
end

# Emulate
emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate)
optimize_hyperparameters!(emulator)


# Predict with emulator
u_test_tmp = zeros(3, length(xspan_test))
u_test_tmp[:, 1] = sol_test.u[1]

for i in 1:(length(xspan_test) - 1)
rf_mean, _ = predict(emulator, u_test_tmp[:, i:i], transform_to_real = true) # 3x1 matrix
u_test_tmp[:, i + 1] = rf_mean
end

train_err_tmp = [0.0]
for i in 1:size(input, 2)
train_mean, _ = predict(emulator, input[:, i:i], transform_to_real = true) # 3x1
train_err_tmp[1] += norm(train_mean - output[:, i])
end
println("normalized L^2 error on training data:", 1 / size(input, 2) * train_err_tmp[1])

u_hist_tmp = zeros(3, length(xspan_hist))
u_hist_tmp[:, 1] = sol_hist.u[1] # start at end of previous sim

for i in 1:(length(xspan_hist) - 1)
rf_mean, _ = predict(emulator, u_hist_tmp[:, i:i], transform_to_real = true) # 3x1 matrix
u_hist_tmp[:, i + 1] = rf_mean
end

push!(train_err, train_err_tmp)
push!(u_test, u_test_tmp)
push!(u_hist, u_hist_tmp)

# plots for the first repeat
if rep_idx == 1
# plotting trace
f = Figure(resolution = (900, 450))
axx = Axis(f[1, 1], xlabel = "time", ylabel = "x")
axy = Axis(f[2, 1], xlabel = "time", ylabel = "y")
axz = Axis(f[3, 1], xlabel = "time", ylabel = "z")

xx = 0:dt:tmax_test
lines!(axx, xx, u_test_tmp[1, :], color = :blue)
lines!(axy, xx, u_test_tmp[2, :], color = :blue)
lines!(axz, xx, u_test_tmp[3, :], color = :blue)

lines!(axx, xspan_test, solplot[1, :], color = :orange)
lines!(axy, xspan_test, solplot[2, :], color = :orange)
lines!(axz, xspan_test, solplot[3, :], color = :orange)

current_figure()
# save
save("l63_test.png", f, px_per_unit = 3)
save("l63_test.pdf", f, pt_per_unit = 3)

# plot attractor
f3 = Figure()
lines(f3[1, 1], u_test_tmp[1, :], u_test_tmp[3, :], color = :blue)
lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange)

# save
save("l63_attr.png", f3, px_per_unit = 3)
save("l63_attr.pdf", f3, pt_per_unit = 3)

# plotting histograms
f2 = Figure()
hist(f2[1, 1], u_hist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
hist(f2[1, 2], u_hist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
hist(f2[1, 3], u_hist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))

hist!(f2[1, 1], solhist[1, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
hist!(f2[1, 2], solhist[2, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
hist!(f2[1, 3], solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))

# save
save("l63_pdf.png", f2, px_per_unit = 3)
save("l63_pdf.pdf", f2, pt_per_unit = 3)
end

# Predict with emulator
xspan_test = 0.0:dt:tmax_test
u_test = zeros(3, length(xspan_test))
u_test[:, 1] = sol_test.u[1]

for i in 1:(length(xspan_test) - 1)
rf_mean, _ = predict(emulator, u_test[:, i:i], transform_to_real = true) # 3x1 matrix
u_test[:, i + 1] = rf_mean
end
train_err = [0.0]
for i in 1:size(input, 2)
train_mean, _ = predict(emulator, input[:, i:i], transform_to_real = true) # 3x1
train_err[1] += norm(train_mean - output[:, i])
end
println("normalized L^2 error on training data:", 1 / size(input, 2) * train_err[1])



# plotting trace
f = Figure(resolution = (900, 450))
axx = Axis(f[1, 1], xlabel = "time", ylabel = "x")
axy = Axis(f[2, 1], xlabel = "time", ylabel = "y")
axz = Axis(f[3, 1], xlabel = "time", ylabel = "z")

xx = 0:dt:tmax_test
lines!(axx, xx, u_test[1, :], color = :blue)
lines!(axy, xx, u_test[2, :], color = :blue)
lines!(axz, xx, u_test[3, :], color = :blue)

# run test data
solplot = zeros(3, length(xspan_test))
for i in 1:length(xspan_test)
solplot[:, i] = sol_test.u[i] #noiseless
end
# save data
JLD2.save("l63_trainerr.jld2", "train_err", train_err)
JLD2.save("l63_histdata.jld2", "solhist", solhist, "uhist", u_hist)
JLD2.save("l63_testdata.jld2", "solplot", solplot, "uplot", u_test)

lines!(axx, xspan_test, solplot[1, :], color = :orange)
lines!(axy, xspan_test, solplot[2, :], color = :orange)
lines!(axz, xspan_test, solplot[3, :], color = :orange)

current_figure()
# save
save("l63_test.png", f, px_per_unit = 3)
save("l63_test.pdf", f, pt_per_unit = 3)

# plot attractor
f3 = Figure()
lines(f3[1, 1], u_test[1, :], u_test[3, :], color = :blue)
lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange)
# compare marginal histograms to truth - rough measure of fit
sol_cdf = sort(solhist, dims = 2)

# save
save("l63_attr.png", f3, px_per_unit = 3)
save("l63_attr.pdf", f3, pt_per_unit = 3)
u_cdf = []
for u in u_hist
u_cdf_tmp = sort(u, dims = 2)
push!(u_cdf, u_cdf_tmp)
end

f4 = Figure(resolution = (900, Int(floor(900 / 1.618))))
axx = Axis(f4[1, 1], xlabel = "", ylabel = "x")
axy = Axis(f4[1, 2], xlabel = "", ylabel = "y")
axz = Axis(f4[1, 3], xlabel = "", ylabel = "z")

# Predict with emulator for histogram
xspan_hist = 0.0:dt:tmax_hist
u_hist = zeros(3, length(xspan_hist))
u_hist[:, 1] = sol_hist.u[1] # start at end of previous sim
unif_samples = (1:size(sol_cdf, 2)) / size(sol_cdf, 2)

for i in 1:(length(xspan_hist) - 1)
rf_mean, _ = predict(emulator, u_hist[:, i:i], transform_to_real = true) # 3x1 matrix
u_hist[:, i + 1] = rf_mean
for u in u_cdf
lines!(axx, u[1, :], unif_samples, color = (:blue, 0.2), linewidth = 4)
lines!(axy, u[2, :], unif_samples, color = (:blue, 0.2), linewidth = 4)
lines!(axz, u[3, :], unif_samples, color = (:blue, 0.2), linewidth = 4)
end

solhist = zeros(3, length(xspan_hist))
for i in 1:length(xspan_hist)
solhist[:, i] = sol_hist.u[i] #noiseless
end
JLD2.save("l63_histdata.jld2", "solhist", solhist, "uhist", u_hist)
lines!(axx, sol_cdf[1, :], unif_samples, color = (:orange, 1.0), linewidth = 4)
lines!(axy, sol_cdf[2, :], unif_samples, color = (:orange, 1.0), linewidth = 4)
lines!(axz, sol_cdf[3, :], unif_samples, color = (:orange, 1.0), linewidth = 4)

# plotting histograms
f2 = Figure()
hist(f2[1, 1], u_hist[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
hist(f2[1, 2], u_hist[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
hist(f2[1, 3], u_hist[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))

hist!(f2[1, 1], solhist[1, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
hist!(f2[1, 2], solhist[2, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
hist!(f2[1, 3], solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))

# save
save("l63_pdf.png", f2, px_per_unit = 3)
save("l63_pdf.pdf", f2, pt_per_unit = 3)


save("l63_cdfs.png", f4, px_per_unit = 3)
save("l63_cdfs.pdf", f4, pt_per_unit = 3)

end

Expand Down

0 comments on commit 9227bc2

Please sign in to comment.