diff --git a/README.md b/README.md index c03fd91069..f8b114792d 100644 --- a/README.md +++ b/README.md @@ -100,3 +100,5 @@ https://github.com/CliMA/Land. [downloads-img]: https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Ftotal_downloads%2FClimaLand&query=total_requests&suffix=%2Ftotal&label=Downloads [downloads-url]: http://juliapkgstats.com/pkg/ClimaLand + + diff --git a/experiments/benchmarks/land.jl b/experiments/benchmarks/land.jl index 8d58dafc01..4f47048478 100644 --- a/experiments/benchmarks/land.jl +++ b/experiments/benchmarks/land.jl @@ -572,74 +572,79 @@ function setup_simulation(; greet = false) ) return prob, ode_algo, Δt, cb end - +@info now() # Warm up and greet prob, ode_algo, Δt, cb = setup_simulation(; greet = true) SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) - -@info "Starting profiling" -# Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES -MAX_PROFILING_TIME_SECONDS = 500 -MAX_PROFILING_SAMPLES = 100 -time_now = time() -timings_s = Float64[] -while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && - length(timings_s) < MAX_PROFILING_SAMPLES - lprob, lode_algo, lΔt, lcb = setup_simulation() - push!( - timings_s, - ClimaComms.@elapsed device SciMLBase.solve( - lprob, - lode_algo; - dt = lΔt, - callback = lcb, - ) - ) -end -num_samples = length(timings_s) -average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) -max_timing_s = round(maximum(timings_s), sigdigits = 3) -min_timing_s = round(minimum(timings_s), sigdigits = 3) -std_timing_s = round( - sqrt(sum(((timings_s .- average_timing_s) .^ 2) / num_samples)), - sigdigits = 3, -) -@info "Num samples: $num_samples" -@info "Average time: $(average_timing_s) s" -@info "Max time: $(max_timing_s) s" -@info "Min time: $(min_timing_s) s" -@info "Standard deviation time: $(std_timing_s) s" -@info "Done profiling" - -prob, ode_algo, Δt, cb = setup_simulation() -Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) -results = Profile.fetch() -flame_file = joinpath(outdir, "flame_$device_suffix.html") -ProfileCanvas.html_file(flame_file, results) -@info "Saved compute flame to $flame_file" - -prob, ode_algo, Δt, cb = setup_simulation() -Profile.Allocs.@profile sample_rate = 0.005 SciMLBase.solve( - prob, - ode_algo; - dt = Δt, - callback = cb, -) -results = Profile.Allocs.fetch() -profile = ProfileCanvas.view_allocs(results) -alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") -ProfileCanvas.html_file(alloc_flame_file, profile) -@info "Saved allocation flame to $alloc_flame_file" - +@info now() +# @info "Starting profiling" +# # Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES +# MAX_PROFILING_TIME_SECONDS = 500 +# MAX_PROFILING_SAMPLES = 100 +# time_now = time() +# timings_s = Float64[] +# while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && +# length(timings_s) < MAX_PROFILING_SAMPLES +# lprob, lode_algo, lΔt, lcb = setup_simulation() +# push!( +# timings_s, +# ClimaComms.@elapsed device SciMLBase.solve( +# lprob, +# lode_algo; +# dt = lΔt, +# callback = lcb, +# ) +# ) +# end +# num_samples = length(timings_s) +# average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) +# max_timing_s = round(maximum(timings_s), sigdigits = 3) +# min_timing_s = round(minimum(timings_s), sigdigits = 3) +# std_timing_s = round( +# sqrt(sum(((timings_s .- average_timing_s) .^ 2) / num_samples)), +# sigdigits = 3, +# ) +# @info "Num samples: $num_samples" +# @info "Average time: $(average_timing_s) s" +# @info "Max time: $(max_timing_s) s" +# @info "Min time: $(min_timing_s) s" +# @info "Standard deviation time: $(std_timing_s) s" +# @info "Done profiling" +# @info now() +# prob, ode_algo, Δt, cb = setup_simulation() +# Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) +# results = Profile.fetch() +# flame_file = joinpath(outdir, "flame_$device_suffix.html") +# ProfileCanvas.html_file(flame_file, results) +# @info "Saved compute flame to $flame_file" +# @info now() +# prob, ode_algo, Δt, cb = setup_simulation() +# Profile.Allocs.@profile sample_rate = 0.005 SciMLBase.solve( +# prob, +# ode_algo; +# dt = Δt, +# callback = cb, +# ) +# results = Profile.Allocs.fetch() +# profile = ProfileCanvas.view_allocs(results) +# alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") +# ProfileCanvas.html_file(alloc_flame_file, profile) +# @info "Saved allocation flame to $alloc_flame_file" +@info now() if ClimaComms.device() isa ClimaComms.CUDADevice import CUDA + @info "setting up CUDA simulation" + @info now() lprob, lode_algo, lΔt, lcb = setup_simulation() + @info "profiling cuda simulation" + @info now() p = CUDA.@profile SciMLBase.solve( lprob, lode_algo; dt = lΔt, callback = lcb, ) + @info now() # use "COLUMNS" to set how many horizontal characters to crop: # See https://github.com/ronisbr/PrettyTables.jl/issues/11#issuecomment-2145550354 envs = ("COLUMNS" => 120,) @@ -652,6 +657,7 @@ if ClimaComms.device() isa ClimaComms.CUDADevice ) show(io, p) end + @info now() println() end diff --git a/experiments/benchmarks/richards.jl b/experiments/benchmarks/richards.jl index 41c91101e0..b8b376d049 100644 --- a/experiments/benchmarks/richards.jl +++ b/experiments/benchmarks/richards.jl @@ -310,72 +310,79 @@ function setup_simulation(; greet = false) end # Warm up and greet +@info now() prob, ode_algo, Δt, cb = setup_simulation(; greet = true) SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) -@info "Starting profiling" -# Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES -MAX_PROFILING_TIME_SECONDS = 500 -MAX_PROFILING_SAMPLES = 100 -time_now = time() -timings_s = Float64[] -while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && - length(timings_s) < MAX_PROFILING_SAMPLES - lprob, lode_algo, lΔt, lcb = setup_simulation() - push!( - timings_s, - ClimaComms.@elapsed device SciMLBase.solve( - lprob, - lode_algo; - dt = lΔt, - callback = lcb, - ) - ) -end -num_samples = length(timings_s) -average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) -max_timing_s = round(maximum(timings_s), sigdigits = 3) -min_timing_s = round(minimum(timings_s), sigdigits = 3) -std_timing_s = round( - sqrt(sum(((timings_s .- average_timing_s) .^ 2) / num_samples)), - sigdigits = 3, -) -@info "Num samples: $num_samples" -@info "Average time: $(average_timing_s) s" -@info "Max time: $(max_timing_s) s" -@info "Min time: $(min_timing_s) s" -@info "Standard deviation time: $(std_timing_s) s" -@info "Done profiling" - -prob, ode_algo, Δt, cb = setup_simulation() -Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) -results = Profile.fetch() -flame_file = joinpath(outdir, "flame_$device_suffix.html") -ProfileCanvas.html_file(flame_file, results) -@info "Saved compute flame to $flame_file" - -prob, ode_algo, Δt, cb = setup_simulation() -Profile.Allocs.@profile sample_rate = 0.005 SciMLBase.solve( - prob, - ode_algo; - dt = Δt, - callback = cb, -) -results = Profile.Allocs.fetch() -profile = ProfileCanvas.view_allocs(results) -alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") -ProfileCanvas.html_file(alloc_flame_file, profile) -@info "Saved allocation flame to $alloc_flame_file" - +# @info "Starting profiling" +# @info now() +# # Stop when we profile for MAX_PROFILING_TIME_SECONDS or MAX_PROFILING_SAMPLES +# MAX_PROFILING_TIME_SECONDS = 500 +# MAX_PROFILING_SAMPLES = 100 +# time_now = time() +# timings_s = Float64[] +# while (time() - time_now) < MAX_PROFILING_TIME_SECONDS && +# length(timings_s) < MAX_PROFILING_SAMPLES +# lprob, lode_algo, lΔt, lcb = setup_simulation() +# push!( +# timings_s, +# ClimaComms.@elapsed device SciMLBase.solve( +# lprob, +# lode_algo; +# dt = lΔt, +# callback = lcb, +# ) +# ) +# end +# num_samples = length(timings_s) +# average_timing_s = round(sum(timings_s) / num_samples, sigdigits = 3) +# max_timing_s = round(maximum(timings_s), sigdigits = 3) +# min_timing_s = round(minimum(timings_s), sigdigits = 3) +# std_timing_s = round( +# sqrt(sum(((timings_s .- average_timing_s) .^ 2) / num_samples)), +# sigdigits = 3, +# ) +# @info "Num samples: $num_samples" +# @info "Average time: $(average_timing_s) s" +# @info "Max time: $(max_timing_s) s" +# @info "Min time: $(min_timing_s) s" +# @info "Standard deviation time: $(std_timing_s) s" +# @info "Done profiling" +# @info now() + +# prob, ode_algo, Δt, cb = setup_simulation() +# Profile.@profile SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb) +# results = Profile.fetch() +# flame_file = joinpath(outdir, "flame_$device_suffix.html") +# ProfileCanvas.html_file(flame_file, results) +# @info "Saved compute flame to $flame_file" +# @info now() +# prob, ode_algo, Δt, cb = setup_simulation() +# Profile.Allocs.@profile sample_rate = 0.005 SciMLBase.solve( +# prob, +# ode_algo; +# dt = Δt, +# callback = cb, +# ) +# results = Profile.Allocs.fetch() +# profile = ProfileCanvas.view_allocs(results) +# alloc_flame_file = joinpath(outdir, "alloc_flame_$device_suffix.html") +# ProfileCanvas.html_file(alloc_flame_file, profile) +# @info "Saved allocation flame to $alloc_flame_file" +@info now() if ClimaComms.device() isa ClimaComms.CUDADevice import CUDA + @info now() lprob, lode_algo, lΔt, lcb = setup_simulation() + @info now() + @info "profiling with cuda" p = CUDA.@profile SciMLBase.solve( lprob, lode_algo; dt = lΔt, callback = lcb, ) + @info now() # use "COLUMNS" to set how many horizontal characters to crop: # See https://github.com/ronisbr/PrettyTables.jl/issues/11#issuecomment-2145550354 envs = ("COLUMNS" => 120,)