Skip to content

Commit

Permalink
Separate out ExperimentConfig, update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
nefrathenrici committed May 31, 2024
1 parent 0004175 commit f73ac0a
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 64 deletions.
33 changes: 18 additions & 15 deletions src/backends.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ abstract type AbstractBackend end
struct JuliaBackend <: AbstractBackend end
struct CaltechHPC <: AbstractBackend end

"""
get_backend()
Determine the appropriate backend using relevant system information.
"""
function get_backend()
backend = JuliaBackend
if isfile("/etc/redhat-release") &&
Expand All @@ -13,13 +18,15 @@ function get_backend()
end

"""
calibrate(config::ExperimentConfig)
calibrate(experiment_dir::AbstractString)
calibrate(::Type{JuliaBackend}, config::ExperimentConfig)
calibrate(::Type{JuliaBackend}, experiment_dir::AbstractString)
Run a calibration in Julia. Takes an ExperimentConfig or an experiment folder.
Run a calibration in Julia.
This function is intended for use in a larger workflow, assuming that all related
model interfaces and data generation scripts are properly aligned with the configuration.
Takes an ExperimentConfig or an experiment folder.
If no backend is passed, one is chosen via `get_backend`.
This function is intended for use in a larger workflow, assuming that all needed
model interface and observation map functions are set up for the calibration.
# Example
Run: `julia --project=experiments/surface_fluxes_perfect_model`
Expand Down Expand Up @@ -66,18 +73,14 @@ end
calibrate(::Type{CaltechHPC}, config::ExperimentConfig; kwargs...)
calibrate(::Type{CaltechHPC}, experiment_dir; kwargs...)
Runs a full calibration, scheduling the forward model runs on Caltech's HPC cluster.
Run a full calibration, scheduling the forward model runs on Caltech's HPC cluster.
Takes either an ExperimentConfig for an experiment folder.
Takes either an ExperimentConfig or an experiment folder.
# Keyword Arguments
- `experiment_dir::AbstractString`: Directory containing experiment configurations.
- `model_interface::AbstractString`: Path to the model interface file.
- `time_limit::AbstractString`: Time limit for Slurm jobs.
- `ntasks::Int`: Number of tasks to run in parallel.
- `cpus_per_task::Int`: Number of CPUs per Slurm task.
- `gpus_per_task::Int`: Number of GPUs per Slurm task.
- `partition::AbstractString`: Slurm partition to use.
- `experiment_dir: Directory containing experiment configurations.
- `model_interface: Path to the model interface file.
- `slurm_kwargs`: Dictionary of slurm arguments, passed through to `sbatch`.
- `verbose::Bool`: Enable verbose output for debugging.
# Usage
Expand Down Expand Up @@ -113,7 +116,7 @@ function calibrate(
joinpath(experiment_dir, "..", "..", "model_interface.jl"),
),
verbose = false,
slurm_kwargs = Dict(:time_limit => 45),
slurm_kwargs = Dict(:time_limit => 45, :ntasks => 1),
)
# ExperimentConfig is created from a YAML file within the experiment_dir
(; n_iterations, output_dir, ensemble_size) = config
Expand Down
82 changes: 56 additions & 26 deletions src/ekp_interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,37 @@ using EnsembleKalmanProcesses.TOMLInterface
export ExperimentConfig

"""
ExperimentConfig(
n_iterations::Integer,
ensemble_size::Integer,
observations,
noise,
prior::ParameterDistribution,
output_dir,
)
ExperimentConfig(filepath::AbstractString; kwargs...)
Constructs an ExperimentConfig from a given YAML file or directory containing 'experiment_config.yml'.
Construct an ExperimentConfig from a given YAML file or directory containing 'experiment_config.yml'.
ExperimentConfig holds the configuration for a calibration experiment.
This can be constructed from a YAML configuration file or directly using individual parameters.
"""
struct ExperimentConfig
Base.@kwdef struct ExperimentConfig
n_iterations::Integer
ensemble_size::Integer
observations::Any
noise::Any
prior::ParameterDistribution
output_dir::Any
emulate_sample::Bool
end

function ExperimentConfig(filepath::AbstractString; kwargs...)
is_yaml_file(f) = isfile(f) && endswith(f, ".yml")

Check warning on line 37 in src/ekp_interface.jl

View check run for this annotation

Codecov / codecov/patch

src/ekp_interface.jl#L37

Added line #L37 was not covered by tests
filepath_extension = joinpath(filepath, "experiment_config.yml")
if endswith(filepath, ".yml") && isfile(filepath)
if is_yaml_file(filepath)

Check warning on line 39 in src/ekp_interface.jl

View check run for this annotation

Codecov / codecov/patch

src/ekp_interface.jl#L39

Added line #L39 was not covered by tests
config_dict = YAML.load_file(filepath)
experiment_dir = dirname(filepath)
elseif isdir(filepath) &&
isfile(filepath_extension) &&
endswith(filepath_extension, ".yml")
elseif isdir(filepath) && is_yaml_file(filepath_extension)

Check warning on line 42 in src/ekp_interface.jl

View check run for this annotation

Codecov / codecov/patch

src/ekp_interface.jl#L42

Added line #L42 was not covered by tests
config_dict = YAML.load_file(filepath_extension)
experiment_dir = filepath
else
Expand Down Expand Up @@ -60,14 +67,13 @@ function ExperimentConfig(filepath::AbstractString; kwargs...)
joinpath(experiment_dir, config_dict["prior"])
prior = get_prior(prior_path)

return ExperimentConfig(
return ExperimentConfig(;

Check warning on line 70 in src/ekp_interface.jl

View check run for this annotation

Codecov / codecov/patch

src/ekp_interface.jl#L70

Added line #L70 was not covered by tests
n_iterations,
ensemble_size,
observations,
noise,
prior,
output_dir,
get(config_dict, "emulate_sample", false);
kwargs...,
)
end
Expand Down Expand Up @@ -134,18 +140,16 @@ end

"""
save_G_ensemble(config::ExperimentConfig, iteration, G_ensemble)
save_G_ensemble(filepath, iteration, G_ensemble)
save_G_ensemble(output_dir::AbstractString, iteration, G_ensemble)
Saves the ensemble's observation map output to the correct directory based on the provided configuration.
Takes either an `ExperimentConfig` or a string used to construct an `ExperimentConfig`.
Takes an output directory, either extracted from an ExperimentConfig or passed directly.
"""
function save_G_ensemble(filepath, iteration, G_ensemble)
config = ExperimentConfig(filepath)
return save_G_ensemble(config, iteration, G_ensemble)
end
save_G_ensemble(config::ExperimentConfig, iteration, G_ensemble) =
save_G_ensemble(config.output_dir, iteration, G_ensemble)

function save_G_ensemble(config::ExperimentConfig, iteration, G_ensemble)
iter_path = path_to_iteration(config.output_dir, iteration)
function save_G_ensemble(output_dir::AbstractString, iteration, G_ensemble)
iter_path = path_to_iteration(output_dir, iteration)
JLD2.save_object(joinpath(iter_path, "G_ensemble.jld2"), G_ensemble)
return G_ensemble
end
Expand Down Expand Up @@ -183,21 +187,44 @@ function env_member_number(env = ENV)
end

"""
initialize(
ensemble_size,
observations,
noise,
prior,
output_dir;
rng_seed = 1234,
)
initialize(config::ExperimentConfig; rng_seed = 1234)
initialize(filepath::AbstractString; rng_seed = 1234)
Initializes the calibration process by setting up the EnsembleKalmanProcess object
and parameter files with a given seed for random number generation.
Takes either an `ExperimentConfig` or a string used to construct an `ExperimentConfig`.
"""
initialize(filepath::AbstractString; kwargs...) =
initialize(ExperimentConfig(filepath); kwargs...)

function initialize(config::ExperimentConfig; rng_seed = 1234)

initialize(config::ExperimentConfig; kwargs...) = initialize(
config.ensemble_size,
config.observations,
config.noise,
config.prior,
config.output_dir;
kwargs...,
)

function initialize(
ensemble_size,
observations,
noise,
prior,
output_dir;
rng_seed = 1234,
)
Random.seed!(rng_seed)
rng_ekp = Random.MersenneTwister(rng_seed)

(; observations, ensemble_size, noise, prior, output_dir) = config
initial_ensemble =
EKP.construct_initial_ensemble(rng_ekp, prior, ensemble_size)
eki = EKP.EnsembleKalmanProcess(
Expand Down Expand Up @@ -227,16 +254,19 @@ function initialize(config::ExperimentConfig; rng_seed = 1234)
end

"""
update_ensemble(config_file, iteration)
update_ensemble(ExperimentConfig, iteration)
update_ensemble(output_dir::AbstractString, iteration, prior)
update_ensemble(config::ExperimentConfig, iteration)
update_ensemble(config_file::AbstractString, iteration)
Updates the Ensemble Kalman Process object and saves the parameters for the next iteration.
Updates the EnsembleKalmanProcess object and saves the parameters for the next iteration.
"""
update_ensemble(config_file, iteration) =
update_ensemble(config_file::AbstractString, iteration) =

Check warning on line 263 in src/ekp_interface.jl

View check run for this annotation

Codecov / codecov/patch

src/ekp_interface.jl#L263

Added line #L263 was not covered by tests
update_ensemble(ExperimentConfig(config_file), iteration)

function update_ensemble(configuration::ExperimentConfig, iteration)
(; prior, output_dir) = configuration
update_ensemble(configuration::ExperimentConfig, iteration) =
update_ensemble(configuration.output_dir, iteration, configuration.prior)

function update_ensemble(output_dir::AbstractString, iteration, prior)
# Load EKI object from iteration folder
iter_path = path_to_iteration(output_dir, iteration)
eki = JLD2.load_object(joinpath(iter_path, "eki_file.jld2"))
Expand Down
10 changes: 6 additions & 4 deletions src/model_interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,18 @@ set_up_forward_model(member, iteration, experiment_config::ExperimentConfig) =
error("set_up_forward_model not implemented")

"""
run_forward_model(config)
run_forward_model(model_config)
Execute the forward model simulation with the given configuration.
Executes the forward model simulation with the given configuration.
`config` should be obtained from `set_up_forward_model`.
This function should be overridden with model-specific implementation details.
`config` should be obtained from `set_up_forward_model`:
`run_forward_model(set_up_forward_model(member, iter, experiment_dir))`
"""
run_forward_model(model_config) = error("run_forward_model not implemented")

"""
observation_map(val:Val, iteration)
observation_map(iteration)
Runs the observation map for the specified iteration.
This function must be implemented for each calibration experiment.
Expand Down
48 changes: 32 additions & 16 deletions src/slurm.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@

kwargs(; kwargs...) = Dict{Symbol, Any}(kwargs...)

"""
generate_sbatch_script
generate_sbatch_script(
iter, member,
output_dir, experiment_dir, model_interface;
module_load_str, slurm_kwargs,
)
Generate a string containing an sbatch script to run the forward model.
Helper function for `sbatch_model_run`.
"""
function generate_sbatch_script(
iter,
member,
output_dir,
experiment_dir,
model_interface;
module_load = """
export MODULEPATH=/groups/esm/modules:\$MODULEPATH
module purge
module load climacommon/2024_05_27
""",
slurm_kwargs = Dict{Symbol, Any}(
:time => 45,
:ntasks => 1,
:cpus_per_task => 1,
),
module_load_str,
slurm_kwargs,
)
member_log = path_to_model_log(output_dir, iter, member)

Expand All @@ -39,7 +36,7 @@ function generate_sbatch_script(
#SBATCH --output=$member_log
$slurm_directives_str
$module_load
$module_load_str
srun --output=$member_log --open-mode=append julia --project=$experiment_dir -e '
import ClimaCalibrate as CAL
Expand All @@ -63,15 +60,33 @@ end
slurm_kwargs,
)
Construct and execute a command to run a model simulation on a Slurm cluster for a single ensemble member.
Construct and execute a command to run a forward model on a Slurm cluster for a single ensemble member.
Arguments:
- iter: Iteration number
- member: Member number
- output_dir: Calibration experiment output directory
- experiment_dir: Directory containing the experiment's Project.toml
- model_interface: File containing the model interface
- module_load_str: Commands which load the necessary modules
- slurm_kwargs: Dictionary containing the slurm resources for the job. Easily generated using `kwargs`.
"""
function sbatch_model_run(
iter,
member,
output_dir,
experiment_dir,
model_interface;
slurm_kwargs = Dict{Symbol, Any}(),
slurm_kwargs = Dict{Symbol, Any}(
:time => 45,
:ntasks => 1,
:cpus_per_task => 1,
),
module_load_str = """
export MODULEPATH=/groups/esm/modules:\$MODULEPATH
module purge
module load climacommon/2024_05_27
""",
kwargs...,
)
sbatch_contents = generate_sbatch_script(
Expand All @@ -81,6 +96,7 @@ function sbatch_model_run(
experiment_dir,
model_interface;
slurm_kwargs,
module_load_str,
kwargs...,
)

Expand All @@ -105,7 +121,7 @@ function wait_for_jobs(
completed_jobs = Set{Int}()

try
while !all(job_completed, statuses)
while length(completed_jobs) < length(statuses)

Check warning on line 124 in src/slurm.jl

View check run for this annotation

Codecov / codecov/patch

src/slurm.jl#L124

Added line #L124 was not covered by tests
for (m, status) in enumerate(statuses)
m in completed_jobs && continue

Expand Down
1 change: 0 additions & 1 deletion test/ekp_interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ config = CAL.ExperimentConfig(
noise,
prior,
output_dir,
false,
)

CAL.initialize(config)
Expand Down
1 change: 0 additions & 1 deletion test/model_interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ using Test
[1],
ClimaCalibrate.get_prior(prior_path),
"output",
false,
)
@test_throws ErrorException("set_up_forward_model not implemented") ClimaCalibrate.set_up_forward_model(
1,
Expand Down
1 change: 0 additions & 1 deletion test/pure_julia_e2e.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ experiment_config = ExperimentConfig(
noise,
prior,
output_dir,
false,
)

# Model interface
Expand Down
5 changes: 5 additions & 0 deletions test/slurm_unit_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ sbatch_file = CAL.generate_sbatch_script(
EXPERIMENT_DIR,
MODEL_INTERFACE;
slurm_kwargs,
module_load_str = """
export MODULEPATH=/groups/esm/modules:\$MODULEPATH
module purge
module load climacommon/2024_05_27
""",
)

expected_sbatch_contents = """
Expand Down

0 comments on commit f73ac0a

Please sign in to comment.