Skip to content

Commit

Permalink
Use env for repro tests, improve repro names
Browse files Browse the repository at this point in the history
Fixes for gpu repro tests, auto-compare all state variables

Improve error message, increment ref counter

Fix zero_dict calls

Fix dict init

Fixes to zero_dict

Improve debug info
  • Loading branch information
charleskawczynski committed Nov 12, 2024
1 parent 682335f commit f802f8b
Show file tree
Hide file tree
Showing 19 changed files with 292 additions and 747 deletions.
117 changes: 111 additions & 6 deletions .buildkite/pipeline.yml

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions config/default_configs/default_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,6 @@ non_orographic_gravity_wave:
nh_poly:
help: "Horizontal polynomial degree. Note: The number of quadrature points in 1D within each horizontal element is then Nq = <--nh_poly> + 1"
value: 3
reproducibility_test:
help: "(Bool) perform reproducibility test"
value: false
check_conservation:
help: "Check conservation of mass and energy [`false` (default), `true`]"
value: false
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
precip_model: "0M"
dt_save_state_to_disk: "2days"
reproducibility_test: true
initial_condition: "MoistBaroclinicWave"
dt: "450secs"
t_end: "10days"
Expand Down
1 change: 0 additions & 1 deletion config/model_configs/diagnostic_edmfx_aquaplanet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,5 @@ cloud_model: "quadrature_sgs"
precip_model: 1M
dt: 120secs
t_end: 3hours
reproducibility_test: true
toml: [toml/diagnostic_edmfx.toml]
ode_algo: ARS343
1 change: 0 additions & 1 deletion config/model_configs/single_column_precipitation_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ precip_model: "1M"
vert_diff: "FriersonDiffusion"
implicit_diffusion: true
approximate_linear_solve_iters: 2
reproducibility_test: false
toml: [toml/single_column_precipitation_test.toml]
diagnostics:
- short_name: [hus, clw, cli, husra, hussn, ta, wa]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ cloud_model: "grid_scale"
surface_temperature: "ZonallyAsymmetric"
moist: "equil"
albedo_model: "RegressionFunctionAlbedo"
reproducibility_test: true
aerosol_radiation: true
prescribed_aerosols: ["CB1", "CB2", "DST01", "DST02", "DST03", "DST04", "OC1", "OC2", "SO4"]
toml: [toml/sphere_aquaplanet.toml]
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
precip_model: "0M"
dt_save_state_to_disk: "2days"
reproducibility_test: true
initial_condition: "MoistBaroclinicWave"
dt: "450secs"
t_end: "10days"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@ t_end: "4days"
vert_diff: true
forcing: "held_suarez"
precip_model: "0M"
reproducibility_test: true
moist: "equil"
toml: [toml/sphere_held_suarez.toml]
15 changes: 4 additions & 11 deletions examples/hybrid/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ end
include(
joinpath(@__DIR__, "..", "..", "reproducibility_tests", "mse_tables.jl"),
)
if config.parsed_args["reproducibility_test"]
# Test results against main branch
if get(ENV, "test_reproducibility", "false") == "true"
# Export reproducibility results, to later test against the main branch
include(
joinpath(
@__DIR__,
Expand All @@ -98,17 +98,10 @@ if config.parsed_args["reproducibility_test"]
"reproducibility_tests.jl",
),
)
@testset "Test reproducibility table entries" begin
mse_keys = sort(collect(keys(all_best_mse[simulation.job_id])))
pcs = collect(Fields.property_chains(sol.u[end]))
for prop_chain in mse_keys
@test prop_chain in pcs
end
end
perform_reproducibility_tests(
export_reproducibility_results(
config.comms_ctx,
simulation.job_id,
sol.u[end],
all_best_mse,
simulation.output_dir,
)
end
Expand Down
17 changes: 7 additions & 10 deletions reproducibility_tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ To update the mse tables:

To add a new reproducibility test:

- Set the command-line `reproducibility_test` to true, and add `julia --color=yes --project=examples reproducibility_tests/test_mse.jl --job_id [job_id] --out_dir [job_id]` as a separate command for the new (or existing) job
- Copy the `all_best_mse` dict template from the job's log
- Paste the `all_best_mse` dict template into `reproducibility_test/mse_tables.jl`
- Add `julia --color=yes --project=examples reproducibility_tests/test_mse.jl --job_id [job_id] --out_dir [job_id]` as a separate command for the new (or existing) job, and set the `test_reproducibility` environment flag. For example: `test_reproducibility: "true"`.
- Add the job's `job_id` into the `reproducibility_test_job_ids` vector in `reproducibility_test/mse_tables.jl`.

<!-- TODO: improve names / mark off sections for all_best_mse dict -->

Expand All @@ -90,19 +89,17 @@ We cannot (easily) compare the output with a reference if we change the spatial

## A detailed procedure of how reproducibility tests are performed

Reprodicibility tests are performed at the end of `examples/hybrid/driver.jl`, after a simulation completes, and relies on a unique job id (`job_id`). Here is an outline of the reproducibility test procedure:
Reprodicibility results are computed at the end of the `examples/hybrid/driver.jl` script, and tested in the `reproducibility_tests/test_mse.jl`. This separation helps us delay. Here is an outline of the reproducibility test procedure:

0) Run a simulation, with a particular `job_id`, to the final time.
1) Load a dictionary, `all_best_mse`, of previous "best" mean-squared errors from `mse_tables.jl` and extract the mean squared errors for the given `job_id` (store in job-specific dictionary, `best_mse`).
2) Export the solution (a `FieldVector`) at the final simulation time to an `NCDataset` file.
3) Compute the errors between the exported solution and the exported solution from the reference `NCDataset` files (which are saved in a dedicated folders on the Caltech Central cluster) and save into a dictionary, called `computed_mse`.
4) Export this dictionary (`computed_mse`) to the output folder
5) Test that `computed_mse` is no worse than `best_mse` (determines if reproducibility test passes or not).
1) Export the solution (a `FieldVector`) at the final simulation time to an HDF5 file.
2) Compute the mean squared errors (MSE) against all other comparable references (which are saved in a dedicated folders on the Caltech Central cluster) for all fieldvector variables in the prognostic state.
3) Convert this set of MSEs to a dictionary (called `computed_mse`), and export it to a file in the output folder.

After these steps are performed at the end of the driver, additional jobs are run:

1) Print `computed_mse` for all jobs to make updating `reproducibility_tests/mse_tables.jl` easy
2) If we're on the github queue merging branch (all tests have passed, and the PR is effectively merging), move the `NCDataset`s from the scratch directory onto the dedicated folder on the Caltech Central cluster.
2) If we're on the github queue merging branch (all tests have passed, and the PR is effectively merging), move the HDF5 files from the scratch directory onto the dedicated folder on the Caltech Central cluster.

## How we track which dataset to compare against

Expand Down
184 changes: 63 additions & 121 deletions reproducibility_tests/compute_mse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,67 +5,67 @@ import ClimaCoreTempestRemap as CCTR

include("latest_comparable_paths.jl")

function get_nc_data(ds, var::String)
if haskey(ds, var)
return ds[var]
else
for key in keys(ds.group)
if haskey(ds.group[key], var)
return ds.group[key][var]
end
end
"""
to_dict(filename::String, comms_ctx)
Convert the HDF5 file containing the
prognostic field `Y` into a `Dict`
using ClimaCore's `property_chains` and
`single_field` functions.
"""
function to_dict(filename::String, comms_ctx)
dict = Dict{Any, AbstractArray}()
reader = InputOutput.HDF5Reader(filename, comms_ctx)
Y = InputOutput.read_field(reader, "Y")
Base.close(reader)
for prop_chain in Fields.property_chains(Y)
dict[prop_chain] =
vec(Array(parent(Fields.single_field(Y, prop_chain))))
end
error("No key $var for mse computation.")
return nothing
return dict
end

"""
to_dict(nc_filename::String, reference_keys::Vector{String})
zero_dict(filename::String, comms_ctx)
Convert an NCDatasets file to a `Dict`.
Return a dict of zeros for all `ClimaCore.Fields.property_chains`
in the fieldvector `Y` contained in the HDF5 file `filename`.
"""
function to_dict(nc_filename::String, reference_keys::Vector{String})
dict = Dict{String, AbstractArray}()
NCDatasets.Dataset(nc_filename, "r") do ds
for key in reference_keys
dict[key] = vec(Array(get_nc_data(ds, key)))
end
function zero_dict(filename::String, comms_ctx)
dict = Dict{Any, AbstractArray}()
reader = InputOutput.HDF5Reader(filename, comms_ctx)
Y = InputOutput.read_field(reader, "Y")
Base.close(reader)
for prop_chain in Fields.property_chains(Y)
dict[prop_chain] =
vec(Array(parent(Fields.single_field(Y, prop_chain)))) .* 0
end
return dict
end

"""
reproducibility_test(;
reproducibility_results(
comms_ctx;
job_id,
reference_mse,
ds_filename_computed,
ds_filename_reference = nothing,
varname,
)
Returns a `Dict` of mean-squared errors between
`NCDataset`s `ds_filename_computed` and
`ds_filename_reference` for all keys in `reference_mse`.
Keys in `reference_mse` may directly map to keys in
the `NCDataset`s, or they may be mapped to the keys
via `varname`.
datasets `ds_filename_computed` and
`ds_filename_reference` for all variables.
If running on buildkite, we get `ds_filename_reference`
from the latest merged dataset on Caltech central.
"""
function reproducibility_test(;
job_id,
reference_mse,
ds_filename_computed,
varname,
)
function reproducibility_results(comms_ctx; job_id, ds_filename_computed)
local ds_filename_reference
reference_keys = map(k -> varname(k), collect(keys(reference_mse)))
paths = String[] # initialize for later handling

if haskey(ENV, "BUILDKITE_COMMIT")
paths = latest_comparable_paths(10)
isempty(paths) && return (reference_mse, paths)
isempty(paths) &&
return (zero_dict(ds_filename_computed, comms_ctx), paths)
@info "`ds_filename_computed`: `$ds_filename_computed`"
ds_filename_references =
map(p -> joinpath(p, ds_filename_computed), paths)
Expand Down Expand Up @@ -94,40 +94,41 @@ function reproducibility_test(;
@warn "There is no reference dataset, and no NC tar file."
end
end
if !isfile(ds_filename_reference)
msg = "\n\n"
msg *= "Pull request author:\n"
msg *= " It seems that a new dataset,\n"
msg *= "\n"
msg *= "dataset file:`$(ds_filename_computed)`,"
msg *= "\n"
msg *= " was created, or the name of the dataset\n"
msg *= " has changed. Please increment the reference\n"
msg *= " counter in `reproducibility_tests/ref_counter.jl`.\n"
msg *= "\n"
msg *= " If this is not the case, then please\n"
msg *= " open an issue with a link pointing to this\n"
msg *= " PR and build.\n"
msg *= "\n"
msg *= "For more information, please find\n"
msg *= "`reproducibility_tests/README.md` and read the section\n\n"
msg *= " `How to merge pull requests (PR) that get approved\n"
msg *= " but *break* reproducibility tests`\n\n"
msg *= "for how to merge this PR."
error(msg)
end
end
non_existent_files = filter(x -> !isfile(x), ds_filename_references)
if !isempty(non_existent_files)
msg = "\n\n"
msg *= "Pull request author:\n"
msg *= " It seems that a new dataset,\n"
msg *= "\n"
msg *= "dataset file(s):`$(non_existent_files)`,"
msg *= "\n"
msg *= " was created, or the name of the dataset\n"
msg *= " has changed. Please increment the reference\n"
msg *= " counter in `reproducibility_tests/ref_counter.jl`.\n"
msg *= "\n"
msg *= " If this is not the case, then please\n"
msg *= " open an issue with a link pointing to this\n"
msg *= " PR and build.\n"
msg *= "\n"
msg *= "For more information, please find\n"
msg *= "`reproducibility_tests/README.md` and read the section\n\n"
msg *= " `How to merge pull requests (PR) that get approved\n"
msg *= " but *break* reproducibility tests`\n\n"
msg *= "for how to merge this PR."
error(msg)
end
else
@warn "Buildkite not detected. Skipping reproducibility tests."
@info "Please review output results before merging."
return (reference_mse, paths)
return (zero_dict(ds_filename_computed, comms_ctx), paths)
end

local computed_mse
@info "Prescribed reference keys $reference_keys"
dict_computed = to_dict(ds_filename_computed, reference_keys)
dict_references =
map(ds -> to_dict(ds, reference_keys), ds_filename_references)
dict_computed = to_dict(ds_filename_computed, comms_ctx)
dict_references = map(ds -> to_dict(ds, comms_ctx), ds_filename_references)
reference_keys = keys(first(dict_references))
@info "Reference keys $reference_keys"
@info "Computed keys $(collect(keys(dict_computed)))"
@info "Reference keys $(collect(keys(first(dict_references))))"
if all(dr -> keys(dict_computed) == keys(dr), dict_references) && all(
Expand All @@ -153,62 +154,3 @@ function reproducibility_test(;
return (computed_mses, paths)

end


##### TODO: move below functions to ClimaCore

function first_center_space(fv::Fields.FieldVector)
for prop_chain in Fields.property_chains(fv)
f = Fields.single_field(fv, prop_chain)
space = axes(f)
if space isa Spaces.CenterExtrudedFiniteDifferenceSpace
return space
end
end
error("Unfound space")
end

function first_face_space(fv::Fields.FieldVector)
for prop_chain in Fields.property_chains(fv)
f = Fields.single_field(fv, prop_chain)
space = axes(f)
if space isa Spaces.FaceExtrudedFiniteDifferenceSpace
return space
end
end
error("Unfound space")
end

function export_nc(
Y::Fields.FieldVector;
nc_filename,
t_now = 0.0,
center_space = first_center_space,
face_space = first_face_space,
filter_prop_chain = pn -> true, # use all fields
varname::Function,
)
prop_chains = Fields.property_chains(Y)
filter!(filter_prop_chain, prop_chains)
cspace = center_space(Y)
fspace = face_space(Y)
# create a temporary dir for intermediate data
FT = eltype(Y)
NCDatasets.NCDataset(nc_filename, "c") do nc
# defines the appropriate dimensions and variables for a space coordinate
# defines the appropriate dimensions and variables for a time coordinate (by default, unlimited size)
nc_time = CCTR.def_time_coord(nc)
CCTR.def_space_coord(nc, cspace, type = "cgll")
CCTR.def_space_coord(nc, fspace, type = "cgll")
# define variables for the prognostic states
for prop_chain in Fields.property_chains(Y)
f = Fields.single_field(Y, prop_chain)
space = axes(f)
nc_var = CCTR.defVar(nc, varname(prop_chain), FT, space, ("time",))
nc_var[:, 1] = f
end
# TODO: interpolate w onto center space and save it the same way as the other vars
nc_time[1] = t_now
end
return nothing
end
10 changes: 3 additions & 7 deletions reproducibility_tests/move_output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@
include(joinpath(@__DIR__, "latest_comparable_paths.jl"))
paths = latest_comparable_paths()

all_lines = readlines(joinpath(@__DIR__, "mse_tables.jl"))
lines = deepcopy(all_lines)
filter!(x -> occursin("] = OrderedCollections", x), lines)
job_ids = getindex.(split.(lines, "\""), 2)
@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
@assert length(job_ids) 0 # safety net
include(joinpath(@__DIR__, "mse_tables.jl"))
job_ids = reproducibility_test_job_ids

# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
Expand All @@ -21,7 +17,7 @@ if buildkite_ci
@info "commit = $(commit)"

using Glob
@show readdir(joinpath(@__DIR__, ".."))
# @show readdir(joinpath(@__DIR__, ".."))
# if a contributor manually merged, we still want to move data
# from scratch to `cluster_data_prefix`. So, let's also try moving
# data if this is running on the main branch.
Expand Down
Loading

0 comments on commit f802f8b

Please sign in to comment.